mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
56 Commits
revert-205
...
dave-gray1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c97440be5 | ||
|
|
4ae4e44506 | ||
|
|
2ada13b1ad | ||
|
|
5d170e9264 | ||
|
|
1b0a64aa46 | ||
|
|
aa8e1c63d5 | ||
|
|
60690c9fc4 | ||
|
|
758b0c9042 | ||
|
|
48d0aa2f6d | ||
|
|
b664edde29 | ||
|
|
e16658b7ec | ||
|
|
d30280ed23 | ||
|
|
9dbd217c59 | ||
|
|
23eac98b3c | ||
|
|
4fffc47e77 | ||
|
|
d65214a234 | ||
|
|
2fb34b00b5 | ||
|
|
f718a391c0 | ||
|
|
ac56ac2b2d | ||
|
|
34c3f563fd | ||
|
|
d2bea6f9e3 | ||
|
|
a09fe1b9ba | ||
|
|
55778b35ff | ||
|
|
8b169f1dac | ||
|
|
d344daf129 | ||
|
|
3411e072ca | ||
|
|
8e36fe9b6f | ||
|
|
0d8bf91699 | ||
|
|
bd507678be | ||
|
|
b6f0e80d54 | ||
|
|
729378ca98 | ||
|
|
220958a87c | ||
|
|
f3f6535aad | ||
|
|
228bc4903f | ||
|
|
38c9abed8b | ||
|
|
66b002458d | ||
|
|
39814cab32 | ||
|
|
180cd4ccda | ||
|
|
284ad026b1 | ||
|
|
afa1bca1e3 | ||
|
|
03adc1f60d | ||
|
|
b319ed58b0 | ||
|
|
8d30b39811 | ||
|
|
1038f7469c | ||
|
|
b9e7708643 | ||
|
|
1e37101930 | ||
|
|
b2772509b4 | ||
|
|
27ec84827c | ||
|
|
852316c5a6 | ||
|
|
e9448005a5 | ||
|
|
bbea62b907 | ||
|
|
13012cfa70 | ||
|
|
8f2681f904 | ||
|
|
f9c75d4878 | ||
|
|
502c1eedaa | ||
|
|
e9f090257c |
@@ -5,4 +5,7 @@ models
|
||||
examples/chatbot-ui/models
|
||||
examples/rwkv/models
|
||||
examples/**/models
|
||||
Dockerfile*
|
||||
Dockerfile*
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
90
.github/workflows/generate_grpc_cache.yaml
vendored
Normal file
90
.github/workflows/generate_grpc_cache.yaml
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
name: 'generate and publish GRPC docker caches'
|
||||
|
||||
on:
|
||||
- workflow_dispatch
|
||||
|
||||
concurrency:
|
||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_caches:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
- name: Release space from worker
|
||||
if: matrix.runs-on == 'ubuntu-latest'
|
||||
run: |
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
df -h
|
||||
echo
|
||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo apt-get remove -y '^mono-.*' || true
|
||||
sudo apt-get remove -y '^ghc-.*' || true
|
||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
sudo apt-get remove -y 'php.*' || true
|
||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
sudo apt-get remove -y '^google-.*' || true
|
||||
sudo apt-get remove -y azure-cli || true
|
||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
sudo apt-get remove -y '^gfortran-.*' || true
|
||||
sudo apt-get remove -y microsoft-edge-stable || true
|
||||
sudo apt-get remove -y firefox || true
|
||||
sudo apt-get remove -y powershell || true
|
||||
sudo apt-get remove -y r-base-core || true
|
||||
sudo apt-get autoremove -y
|
||||
sudo apt-get clean
|
||||
echo
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
sudo rm -rfv build || true
|
||||
sudo rm -rf /usr/share/dotnet || true
|
||||
sudo rm -rf /opt/ghc || true
|
||||
sudo rm -rf "/usr/local/share/boost" || true
|
||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||
df -h
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache GRPC
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.58.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-to: type=gha,ignore-error=true
|
||||
target: grpc
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: false
|
||||
9
.github/workflows/image-pr.yml
vendored
9
.github/workflows/image-pr.yml
vendored
@@ -22,6 +22,7 @@ jobs:
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
@@ -61,12 +62,14 @@ jobs:
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
@@ -85,6 +88,7 @@ jobs:
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
@@ -102,11 +106,12 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
@@ -122,4 +127,4 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
22
.github/workflows/image.yml
vendored
22
.github/workflows/image.yml
vendored
@@ -26,6 +26,7 @@ jobs:
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
aio: ${{ matrix.aio }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
@@ -129,6 +130,7 @@ jobs:
|
||||
image-type: 'extras'
|
||||
aio: "-aio-gpu-hipblas"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||
runs-on: 'arc-runner-set'
|
||||
@@ -140,12 +142,14 @@ jobs:
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
@@ -158,6 +162,7 @@ jobs:
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
@@ -171,6 +176,7 @@ jobs:
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
@@ -180,6 +186,7 @@ jobs:
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
@@ -189,6 +196,7 @@ jobs:
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
@@ -198,6 +206,7 @@ jobs:
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
@@ -210,6 +219,7 @@ jobs:
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
@@ -219,6 +229,7 @@ jobs:
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
|
||||
@@ -236,6 +247,7 @@ jobs:
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
aio: ${{ matrix.aio }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
@@ -258,7 +270,7 @@ jobs:
|
||||
aio: "-aio-cpu"
|
||||
latest-image: 'latest-cpu'
|
||||
latest-image-aio: 'latest-aio-cpu'
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
@@ -269,7 +281,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
@@ -280,7 +292,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
@@ -291,7 +303,7 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
@@ -302,4 +314,4 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=5 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
|
||||
15
.github/workflows/image_build.yml
vendored
15
.github/workflows/image_build.yml
vendored
@@ -6,6 +6,10 @@ on:
|
||||
inputs:
|
||||
base-image:
|
||||
description: 'Base image'
|
||||
required: true
|
||||
type: string
|
||||
grpc-base-image:
|
||||
description: 'GRPC Base image, must be a compatible image with base-image'
|
||||
required: false
|
||||
default: ''
|
||||
type: string
|
||||
@@ -57,7 +61,7 @@ on:
|
||||
makeflags:
|
||||
description: 'Make Flags'
|
||||
required: false
|
||||
default: '--jobs=3 --output-sync=target'
|
||||
default: '--jobs=4 --output-sync=target'
|
||||
type: string
|
||||
aio:
|
||||
description: 'AIO Image Name'
|
||||
@@ -201,15 +205,16 @@ jobs:
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
build-args: |
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.58.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,ignore-error=true
|
||||
target: grpc
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: false
|
||||
|
||||
31
.github/workflows/test-extra.yml
vendored
31
.github/workflows/test-extra.yml
vendored
@@ -74,6 +74,37 @@ jobs:
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||
|
||||
|
||||
tests-rerankers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test rerankers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers test
|
||||
|
||||
tests-diffusers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
21
.github/workflows/test.yml
vendored
21
.github/workflows/test.yml
vendored
@@ -121,8 +121,11 @@ jobs:
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
timeout-minutes: 5
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
tests-aio-container:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -173,8 +176,11 @@ jobs:
|
||||
make run-e2e-aio
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
timeout-minutes: 5
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
tests-apple:
|
||||
runs-on: macOS-14
|
||||
@@ -207,5 +213,8 @@ jobs:
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3
|
||||
timeout-minutes: 5
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
31
.github/workflows/update_swagger.yaml
vendored
Normal file
31
.github/workflows/update_swagger.yaml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
name: Update swagger
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
swagger:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- run: |
|
||||
go install github.com/swaggo/swag/cmd/swag@latest
|
||||
- name: Bump swagger 🔧
|
||||
run: |
|
||||
make swagger
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: 'feat(swagger): update swagger'
|
||||
title: 'feat(swagger): update swagger'
|
||||
branch: "update/swagger"
|
||||
body: Update swagger
|
||||
signoff: true
|
||||
|
||||
18
.github/workflows/yaml-check.yml
vendored
Normal file
18
.github/workflows/yaml-check.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: 'Yamllint GitHub Actions'
|
||||
on:
|
||||
- pull_request
|
||||
jobs:
|
||||
yamllint:
|
||||
name: 'Yamllint'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: 'Checkout'
|
||||
uses: actions/checkout@master
|
||||
- name: 'Yamllint'
|
||||
uses: karancode/yamllint-github-action@master
|
||||
with:
|
||||
yamllint_file_or_dir: 'gallery'
|
||||
yamllint_strict: false
|
||||
yamllint_comment: true
|
||||
env:
|
||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -44,3 +44,6 @@ prepare
|
||||
*.pb.go
|
||||
*pb2.py
|
||||
*pb2_grpc.py
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
29
Dockerfile
29
Dockerfile
@@ -1,8 +1,9 @@
|
||||
ARG IMAGE_TYPE=extras
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# extras or core
|
||||
FROM ${BASE_IMAGE} as requirements-core
|
||||
FROM ${BASE_IMAGE} AS requirements-core
|
||||
|
||||
USER root
|
||||
|
||||
@@ -15,7 +16,7 @@ ARG TARGETVARIANT
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||
|
||||
@@ -23,7 +24,7 @@ RUN apt-get update && \
|
||||
apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH $PATH:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers
|
||||
@@ -79,7 +80,7 @@ RUN test -n "$TARGETARCH" \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-core as requirements-extras
|
||||
FROM requirements-core AS requirements-extras
|
||||
|
||||
RUN apt install -y gpg && \
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
@@ -104,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM ${BASE_IMAGE} as grpc
|
||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
|
||||
ARG MAKEFLAGS
|
||||
ARG GRPC_VERSION=v1.58.0
|
||||
@@ -120,16 +121,15 @@ RUN apt-get update && \
|
||||
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
|
||||
|
||||
RUN cd grpc && \
|
||||
mkdir -p cmake/build && \
|
||||
cd cmake/build && \
|
||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
|
||||
WORKDIR /build/grpc/cmake/build
|
||||
|
||||
RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
|
||||
make
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE} as builder
|
||||
FROM requirements-${IMAGE_TYPE} AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
ARG GRPC_BACKENDS
|
||||
@@ -167,9 +167,11 @@ RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
COPY --from=grpc /build/grpc ./grpc/
|
||||
|
||||
RUN cd /build/grpc/cmake/build && make install
|
||||
WORKDIR /build/grpc/cmake/build
|
||||
RUN make install
|
||||
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
@@ -257,6 +259,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/transformers \
|
||||
; fi
|
||||
@@ -287,7 +292,7 @@ RUN mkdir -p /build/models
|
||||
|
||||
# Define the health check command
|
||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||
|
||||
VOLUME /build/models
|
||||
EXPOSE 8080
|
||||
|
||||
79
Makefile
79
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=7593639ce335e8d7f89aa9a54d616951f273af60
|
||||
CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=b0c3cbf2e851cf232e432b590dcc514a689ec028
|
||||
WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
@@ -179,20 +179,20 @@ endif
|
||||
all: help
|
||||
|
||||
## BERT embeddings
|
||||
sources/go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
||||
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-bert.cpp:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
|
||||
cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-bert/libgobert.a: sources/go-bert
|
||||
$(MAKE) -C sources/go-bert libgobert.a
|
||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||
|
||||
## go-llama-ggml
|
||||
sources/go-llama-ggml:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
||||
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
|
||||
cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
||||
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
@@ -211,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
|
||||
## RWKV
|
||||
sources/go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
|
||||
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-rwkv.cpp:
|
||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
||||
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
|
||||
## stable diffusion
|
||||
sources/go-stable-diffusion:
|
||||
@@ -236,23 +236,24 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
||||
|
||||
## whisper
|
||||
sources/whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||
git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && make libwhisper.a
|
||||
|
||||
get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
||||
@@ -271,12 +272,12 @@ prepare-sources: get-sources replace
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama-ggml clean
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C sources/go-rwkv clean
|
||||
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-bert clean
|
||||
$(MAKE) -C sources/go-bert.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C sources/go-tiny-dream clean
|
||||
$(MAKE) build
|
||||
@@ -436,10 +437,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@@ -505,6 +506,14 @@ petals-protogen:
|
||||
petals-protogen-clean:
|
||||
$(MAKE) -C backend/python/petals protogen-clean
|
||||
|
||||
.PHONY: rerankers-protogen
|
||||
rerankers-protogen:
|
||||
$(MAKE) -C backend/python/rerankers protogen
|
||||
|
||||
.PHONY: rerankers-protogen-clean
|
||||
rerankers-protogen-clean:
|
||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||
|
||||
.PHONY: sentencetransformers-protogen
|
||||
sentencetransformers-protogen:
|
||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
||||
@@ -563,6 +572,7 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/mamba
|
||||
$(MAKE) -C backend/python/sentencetransformers
|
||||
$(MAKE) -C backend/python/rerankers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/transformers-musicgen
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
@@ -598,8 +608,8 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/
|
||||
backend-assets/grpc: protogen-go replace
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
|
||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||
@@ -641,17 +651,16 @@ ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
@@ -714,4 +723,4 @@ docker-image-intel-xpu:
|
||||
|
||||
.PHONY: swagger
|
||||
swagger:
|
||||
swag init -g core/http/api.go --output swagger
|
||||
swag init -g core/http/app.go --output swagger
|
||||
|
||||
@@ -44,18 +44,19 @@
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||
## 🔥🔥 Hot topics / Roadmap
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
|
||||
- llama3: https://github.com/mudler/LocalAI/discussions/2076
|
||||
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
|
||||
- Landing page: https://github.com/mudler/LocalAI/pull/1922
|
||||
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
||||
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
||||
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
|
||||
27
aio/cpu/rerank.yaml
Normal file
27
aio/cpu/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
name: jina-reranker-v1-base-en
|
||||
backend: rerankers
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/v1/rerank \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "jina-reranker-v1-base-en",
|
||||
"query": "Organic skincare products for sensitive skin",
|
||||
"documents": [
|
||||
"Eco-friendly kitchenware for modern homes",
|
||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||
"Organic cotton baby clothes for sensitive skin",
|
||||
"Natural organic skincare range for sensitive skin",
|
||||
"Tech gadgets for smart homes: 2024 edition",
|
||||
"Sustainable gardening tools and compost solutions",
|
||||
"Sensitive skin-friendly facial cleansers and toners",
|
||||
"Organic food wraps and storage solutions",
|
||||
"All-natural pet food for dogs with allergies",
|
||||
"Yoga mats made from recycled materials"
|
||||
],
|
||||
"top_n": 3
|
||||
}'
|
||||
@@ -6,15 +6,22 @@ parameters:
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
||||
{{- if .Content}}
|
||||
{{.Content}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
||||
<|im_end|>
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
@@ -29,8 +36,7 @@ template:
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call>
|
||||
<|im_end|>
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
|
||||
@@ -129,7 +129,7 @@ detect_gpu
|
||||
detect_gpu_size
|
||||
|
||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
|
||||
check_vars
|
||||
|
||||
|
||||
27
aio/gpu-8g/rerank.yaml
Normal file
27
aio/gpu-8g/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
name: jina-reranker-v1-base-en
|
||||
backend: rerankers
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/v1/rerank \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "jina-reranker-v1-base-en",
|
||||
"query": "Organic skincare products for sensitive skin",
|
||||
"documents": [
|
||||
"Eco-friendly kitchenware for modern homes",
|
||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||
"Organic cotton baby clothes for sensitive skin",
|
||||
"Natural organic skincare range for sensitive skin",
|
||||
"Tech gadgets for smart homes: 2024 edition",
|
||||
"Sustainable gardening tools and compost solutions",
|
||||
"Sensitive skin-friendly facial cleansers and toners",
|
||||
"Organic food wraps and storage solutions",
|
||||
"All-natural pet food for dogs with allergies",
|
||||
"Yoga mats made from recycled materials"
|
||||
],
|
||||
"top_n": 3
|
||||
}'
|
||||
@@ -6,15 +6,22 @@ parameters:
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
||||
{{- if .Content}}
|
||||
{{.Content}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
||||
<|im_end|>
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
@@ -29,8 +36,7 @@ template:
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call>
|
||||
<|im_end|>
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
|
||||
27
aio/intel/rerank.yaml
Normal file
27
aio/intel/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
name: jina-reranker-v1-base-en
|
||||
backend: rerankers
|
||||
parameters:
|
||||
model: cross-encoder
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/v1/rerank \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "jina-reranker-v1-base-en",
|
||||
"query": "Organic skincare products for sensitive skin",
|
||||
"documents": [
|
||||
"Eco-friendly kitchenware for modern homes",
|
||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||
"Organic cotton baby clothes for sensitive skin",
|
||||
"Natural organic skincare range for sensitive skin",
|
||||
"Tech gadgets for smart homes: 2024 edition",
|
||||
"Sustainable gardening tools and compost solutions",
|
||||
"Sensitive skin-friendly facial cleansers and toners",
|
||||
"Organic food wraps and storage solutions",
|
||||
"All-natural pet food for dogs with allergies",
|
||||
"Yoga mats made from recycled materials"
|
||||
],
|
||||
"top_n": 3
|
||||
}'
|
||||
@@ -7,15 +7,22 @@ parameters:
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
||||
{{- if .Content}}
|
||||
{{.Content}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
||||
<|im_end|>
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
@@ -30,8 +37,7 @@ template:
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call>
|
||||
<|im_end|>
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
|
||||
@@ -23,6 +23,30 @@ service Backend {
|
||||
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
||||
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||
|
||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||
}
|
||||
|
||||
message RerankRequest {
|
||||
string query = 1;
|
||||
repeated string documents = 2;
|
||||
int32 top_n = 3;
|
||||
}
|
||||
|
||||
message RerankResult {
|
||||
Usage usage = 1;
|
||||
repeated DocumentResult results = 2;
|
||||
}
|
||||
|
||||
message Usage {
|
||||
int32 total_tokens = 1;
|
||||
int32 prompt_tokens = 2;
|
||||
}
|
||||
|
||||
message DocumentResult {
|
||||
int32 index = 1;
|
||||
string text = 2;
|
||||
float relevance_score = 3;
|
||||
}
|
||||
|
||||
message StoresKey {
|
||||
@@ -177,6 +201,7 @@ message ModelOptions {
|
||||
bool EnforceEager = 52;
|
||||
int32 SwapSpace = 53;
|
||||
int32 MaxModelLen = 54;
|
||||
int32 TensorParallelSize = 55;
|
||||
|
||||
string MMProj = 41;
|
||||
|
||||
|
||||
@@ -120,4 +120,6 @@ dependencies:
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
||||
@@ -108,4 +108,6 @@ dependencies:
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
||||
@@ -111,5 +111,7 @@ dependencies:
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
||||
@@ -61,4 +61,5 @@ dependencies:
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- torch
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
|
||||
@@ -71,4 +71,5 @@ dependencies:
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
|
||||
27
backend/python/rerankers/Makefile
Normal file
27
backend/python/rerankers/Makefile
Normal file
@@ -0,0 +1,27 @@
|
||||
.PHONY: rerankers
|
||||
rerankers: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running rerankers..."
|
||||
bash run.sh
|
||||
@echo "rerankers run."
|
||||
|
||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing rerankers..."
|
||||
bash test.sh
|
||||
@echo "rerankers tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
5
backend/python/rerankers/README.md
Normal file
5
backend/python/rerankers/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Creating a separate environment for the reranker project
|
||||
|
||||
```
|
||||
make reranker
|
||||
```
|
||||
123
backend/python/rerankers/reranker.py
Executable file
123
backend/python/rerankers/reranker.py
Executable file
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for Rerankers models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from rerankers import Reranker
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
kwargs = {}
|
||||
if request.Type != "":
|
||||
kwargs['model_type'] = request.Type
|
||||
if request.PipelineType != "": # Reuse the PipelineType field for language
|
||||
kwargs['lang'] = request.PipelineType
|
||||
self.model_name = model_name
|
||||
self.model = Reranker(model_name, **kwargs)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
# Implement your logic here for the LoadModel service
|
||||
# Replace this with your desired response
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Rerank(self, request, context):
|
||||
documents = []
|
||||
for idx, doc in enumerate(request.documents):
|
||||
documents.append(doc)
|
||||
ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
|
||||
# Prepare results to return
|
||||
results = [
|
||||
backend_pb2.DocumentResult(
|
||||
index=res.doc_id,
|
||||
text=res.text,
|
||||
relevance_score=res.score
|
||||
) for res in ranked_results.results
|
||||
]
|
||||
|
||||
# Calculate the usage and total tokens
|
||||
# TODO: Implement the usage calculation with reranker
|
||||
total_tokens = sum(len(doc.split()) for doc in request.documents) + len(request.query.split())
|
||||
prompt_tokens = len(request.query.split())
|
||||
usage = backend_pb2.Usage(total_tokens=total_tokens, prompt_tokens=prompt_tokens)
|
||||
return backend_pb2.RerankResult(usage=usage, results=results)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
14
backend/python/rerankers/run.sh
Executable file
14
backend/python/rerankers/run.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the reranker server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/reranker.py $@
|
||||
11
backend/python/rerankers/test.sh
Executable file
11
backend/python/rerankers/test.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the reranker server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test_reranker.py
|
||||
90
backend/python/rerankers/test_reranker.py
Executable file
90
backend/python/rerankers/test_reranker.py
Executable file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "reranker.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.kill()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_rerank(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
request = backend_pb2.RerankRequest(
|
||||
query="I love you",
|
||||
documents=["I hate you", "I really like you"],
|
||||
top_n=2
|
||||
)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
|
||||
self.assertTrue(response.success)
|
||||
|
||||
rerank_response = stub.Rerank(request)
|
||||
print(rerank_response.results[0])
|
||||
self.assertIsNotNone(rerank_response.results)
|
||||
self.assertEqual(len(rerank_response.results), 2)
|
||||
self.assertEqual(rerank_response.results[0].text, "I really like you")
|
||||
self.assertEqual(rerank_response.results[1].text, "I hate you")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Reranker service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -148,7 +148,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
else:
|
||||
device_map="CPU"
|
||||
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
||||
compile=True,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "LATENCY"},
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
@@ -158,6 +160,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
quantization_config=quantization,
|
||||
device_map=device_map,
|
||||
torch_dtype=compute)
|
||||
if request.ContextSize > 0:
|
||||
self.max_tokens = request.ContextSize
|
||||
else:
|
||||
self.max_tokens = self.model.config.max_position_embeddings
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||
self.XPU = False
|
||||
|
||||
@@ -212,12 +219,27 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
set_seed(request.Seed)
|
||||
if request.TopP == 0:
|
||||
request.TopP = 0.9
|
||||
|
||||
if request.TopK == 0:
|
||||
request.TopK = 40
|
||||
|
||||
prompt = request.Prompt
|
||||
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||
|
||||
eos_token_id = self.tokenizer.eos_token_id
|
||||
if request.StopPrompts:
|
||||
eos_token_id = []
|
||||
for word in request.StopPrompts:
|
||||
eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
|
||||
|
||||
inputs = self.tokenizer(prompt, return_tensors="pt")
|
||||
|
||||
max_tokens = 200
|
||||
if request.Tokens > 0:
|
||||
max_tokens = request.Tokens
|
||||
else:
|
||||
max_tokens = self.max_tokens - inputs["input_ids"].size()[inputs["input_ids"].dim()-1]
|
||||
|
||||
inputs = self.tokenizer(request.Prompt, return_tensors="pt")
|
||||
if self.CUDA:
|
||||
inputs = inputs.to("cuda")
|
||||
if XPU and self.OV == False:
|
||||
@@ -235,7 +257,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
top_k=request.TopK,
|
||||
do_sample=True,
|
||||
attention_mask=inputs["attention_mask"],
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
pad_token_id=self.tokenizer.eos_token_id,
|
||||
streamer=streamer)
|
||||
thread=Thread(target=self.model.generate, kwargs=config)
|
||||
@@ -264,7 +286,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
top_k=request.TopK,
|
||||
do_sample=True,
|
||||
attention_mask=inputs["attention_mask"],
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
eos_token_id=eos_token_id,
|
||||
pad_token_id=self.tokenizer.eos_token_id)
|
||||
generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
|
||||
|
||||
|
||||
@@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
engine_args.trust_remote_code = request.TrustRemoteCode
|
||||
if request.EnforceEager:
|
||||
engine_args.enforce_eager = request.EnforceEager
|
||||
if request.TensorParallelSize:
|
||||
engine_args.tensor_parallel_size = request.TensorParallelSize
|
||||
if request.SwapSpace != 0:
|
||||
engine_args.swap_space = request.SwapSpace
|
||||
if request.MaxModelLen != 0:
|
||||
|
||||
@@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
EnforceEager: c.EnforceEager,
|
||||
SwapSpace: int32(c.SwapSpace),
|
||||
MaxModelLen: int32(c.MaxModelLen),
|
||||
TensorParallelSize: int32(c.TensorParallelSize),
|
||||
MMProj: c.MMProj,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
|
||||
39
core/backend/rerank.go
Normal file
39
core/backend/rerank.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
bb := backend
|
||||
if bb == "" {
|
||||
return nil, fmt.Errorf("backend is required")
|
||||
}
|
||||
|
||||
grpcOpts := gRPCModelOpts(backendConfig)
|
||||
|
||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
||||
model.WithBackendString(bb),
|
||||
model.WithModel(modelFile),
|
||||
model.WithContext(appConfig.Context),
|
||||
model.WithAssetDir(appConfig.AssetsDestination),
|
||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||
})
|
||||
rerankModel, err := loader.BackendLoader(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if rerankModel == nil {
|
||||
return nil, fmt.Errorf("could not load rerank model")
|
||||
}
|
||||
|
||||
res, err := rerankModel.Rerank(context.Background(), request)
|
||||
|
||||
return res, err
|
||||
}
|
||||
@@ -4,7 +4,7 @@ import "embed"
|
||||
|
||||
type Context struct {
|
||||
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug" help:"Set the level of logs to output [${enum}]"`
|
||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
||||
|
||||
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
||||
BackendAssets embed.FS `kong:"-"`
|
||||
|
||||
@@ -25,7 +25,7 @@ type ModelsInstall struct {
|
||||
}
|
||||
|
||||
type ModelsCMD struct {
|
||||
List ModelsList `cmd:"" help:"List the models avaiable in your galleries" default:"withargs"`
|
||||
List ModelsList `cmd:"" help:"List the models available in your galleries" default:"withargs"`
|
||||
Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
|
||||
}
|
||||
|
||||
|
||||
@@ -2,30 +2,31 @@ package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http"
|
||||
"github.com/go-skynet/LocalAI/core/startup"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type RunCMD struct {
|
||||
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
|
||||
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
|
||||
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
|
||||
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
|
||||
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
ImagePath string `env:"LOCALAI_IMAGE_PATH,IMAGE_PATH" type:"path" default:"/tmp/generated/images" help:"Location for images generated by backends (e.g. stablediffusion)" group:"storage"`
|
||||
AudioPath string `env:"LOCALAI_AUDIO_PATH,AUDIO_PATH" type:"path" default:"/tmp/generated/audio" help:"Location for audio generated by backends (e.g. piper)" group:"storage"`
|
||||
UploadPath string `env:"LOCALAI_UPLOAD_PATH,UPLOAD_PATH" type:"path" default:"/tmp/localai/upload" help:"Path to store uploads from files api" group:"storage"`
|
||||
ConfigPath string `env:"LOCALAI_CONFIG_PATH,CONFIG_PATH" default:"/tmp/localai/config" group:"storage"`
|
||||
LocalaiConfigDir string `env:"LOCALAI_CONFIG_DIR" type:"path" default:"${basepath}/configuration" help:"Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json)" group:"storage"`
|
||||
LocalaiConfigDirPollInterval time.Duration `env:"LOCALAI_CONFIG_DIR_POLL_INTERVAL" help:"Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to an interval to poll the LocalAI Config Dir (example: 1m)" group:"storage"`
|
||||
// The alias on this option is there to preserve functionality with the old `--config-file` parameter
|
||||
ModelsConfigFile string `env:"LOCALAI_MODELS_CONFIG_FILE,CONFIG_FILE" aliases:"config-file" help:"YAML file containing a list of model backend configs" group:"storage"`
|
||||
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
@@ -60,15 +61,16 @@ func (r *RunCMD) Run(ctx *Context) error {
|
||||
config.WithYAMLConfigPreload(r.PreloadModelsConfig),
|
||||
config.WithModelPath(r.ModelsPath),
|
||||
config.WithContextSize(r.ContextSize),
|
||||
config.WithDebug(*ctx.LogLevel == "debug"),
|
||||
config.WithDebug(zerolog.GlobalLevel() <= zerolog.DebugLevel),
|
||||
config.WithImageDir(r.ImagePath),
|
||||
config.WithAudioDir(r.AudioPath),
|
||||
config.WithUploadDir(r.UploadPath),
|
||||
config.WithConfigsDir(r.ConfigPath),
|
||||
config.WithDynamicConfigDir(r.LocalaiConfigDir),
|
||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||
config.WithF16(r.F16),
|
||||
config.WithStringGalleries(r.Galleries),
|
||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||
config.WithDisableMessage(false),
|
||||
config.WithCors(r.CORS),
|
||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||
config.WithThreads(r.Threads),
|
||||
@@ -129,22 +131,10 @@ func (r *RunCMD) Run(ctx *Context) error {
|
||||
}
|
||||
|
||||
cl, ml, options, err := startup.Startup(opts...)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
||||
}
|
||||
|
||||
// Watch the configuration directory
|
||||
// If the directory does not exist, we don't watch it
|
||||
if _, err := os.Stat(r.LocalaiConfigDir); err == nil {
|
||||
closeConfigWatcherFn, err := startup.WatchConfigDirectory(r.LocalaiConfigDir, options)
|
||||
defer closeConfigWatcherFn()
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed while watching configuration directory %s", r.LocalaiConfigDir)
|
||||
}
|
||||
}
|
||||
|
||||
appHTTP, err := http.App(cl, ml, options)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error during HTTP App construction")
|
||||
|
||||
@@ -17,11 +17,13 @@ type ApplicationConfig struct {
|
||||
UploadLimitMB, Threads, ContextSize int
|
||||
DisableWelcomePage bool
|
||||
F16 bool
|
||||
Debug, DisableMessage bool
|
||||
Debug bool
|
||||
ImageDir string
|
||||
AudioDir string
|
||||
UploadDir string
|
||||
ConfigsDir string
|
||||
DynamicConfigsDir string
|
||||
DynamicConfigsDirPollInterval time.Duration
|
||||
CORS bool
|
||||
PreloadJSONModels string
|
||||
PreloadModelsFromPath string
|
||||
@@ -55,12 +57,11 @@ type AppOption func(*ApplicationConfig)
|
||||
|
||||
func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
|
||||
opt := &ApplicationConfig{
|
||||
Context: context.Background(),
|
||||
UploadLimitMB: 15,
|
||||
Threads: 1,
|
||||
ContextSize: 512,
|
||||
Debug: true,
|
||||
DisableMessage: true,
|
||||
Context: context.Background(),
|
||||
UploadLimitMB: 15,
|
||||
Threads: 1,
|
||||
ContextSize: 512,
|
||||
Debug: true,
|
||||
}
|
||||
for _, oo := range o {
|
||||
oo(opt)
|
||||
@@ -234,12 +235,6 @@ func WithDebug(debug bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithDisableMessage(disableMessage bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DisableMessage = disableMessage
|
||||
}
|
||||
}
|
||||
|
||||
func WithAudioDir(audioDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.AudioDir = audioDir
|
||||
@@ -264,6 +259,18 @@ func WithConfigsDir(configsDir string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDir(dynamicConfigsDir string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDir = dynamicConfigsDir
|
||||
}
|
||||
}
|
||||
|
||||
func WithDynamicConfigDirPollInterval(interval time.Duration) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.DynamicConfigsDirPollInterval = interval
|
||||
}
|
||||
}
|
||||
|
||||
func WithApiKeys(apiKeys []string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ApiKeys = apiKeys
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
@@ -39,7 +40,7 @@ type BackendConfig struct {
|
||||
InputToken [][]int `yaml:"-"`
|
||||
functionCallString, functionCallNameString string `yaml:"-"`
|
||||
|
||||
FunctionsConfig Functions `yaml:"function"`
|
||||
FunctionsConfig functions.FunctionsConfig `yaml:"function"`
|
||||
|
||||
FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||
@@ -139,6 +140,7 @@ type LLMConfig struct {
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
@@ -157,13 +159,6 @@ type AutoGPTQ struct {
|
||||
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
|
||||
}
|
||||
|
||||
type Functions struct {
|
||||
DisableNoAction bool `yaml:"disable_no_action"`
|
||||
NoActionFunctionName string `yaml:"no_action_function_name"`
|
||||
NoActionDescriptionName string `yaml:"no_action_description_name"`
|
||||
ParallelCalls bool `yaml:"parallel_calls"`
|
||||
}
|
||||
|
||||
type TemplateConfig struct {
|
||||
Chat string `yaml:"chat"`
|
||||
ChatMessage string `yaml:"chat_message"`
|
||||
@@ -210,15 +205,15 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
defaultTopP := 0.95
|
||||
defaultTopK := 40
|
||||
defaultTemp := 0.9
|
||||
defaultMaxTokens := 2048
|
||||
defaultMirostat := 2
|
||||
defaultMirostatTAU := 5.0
|
||||
defaultMirostatETA := 0.1
|
||||
defaultTypicalP := 1.0
|
||||
defaultTFZ := 1.0
|
||||
defaultZero := 0
|
||||
|
||||
// Try to offload all GPU layers (if GPU is found)
|
||||
defaultNGPULayers := 99999999
|
||||
defaultHigh := 99999999
|
||||
|
||||
trueV := true
|
||||
falseV := false
|
||||
@@ -259,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
}
|
||||
|
||||
if cfg.Maxtokens == nil {
|
||||
cfg.Maxtokens = &defaultMaxTokens
|
||||
cfg.Maxtokens = &defaultZero
|
||||
}
|
||||
|
||||
if cfg.Mirostat == nil {
|
||||
@@ -274,7 +269,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.MirostatTAU = &defaultMirostatTAU
|
||||
}
|
||||
if cfg.NGPULayers == nil {
|
||||
cfg.NGPULayers = &defaultNGPULayers
|
||||
cfg.NGPULayers = &defaultHigh
|
||||
}
|
||||
|
||||
if cfg.LowVRAM == nil {
|
||||
@@ -517,7 +512,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
for i, config := range cl.configs {
|
||||
|
||||
// Download files and verify their SHA
|
||||
for _, file := range config.DownloadFiles {
|
||||
for i, file := range config.DownloadFiles {
|
||||
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
|
||||
|
||||
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
|
||||
@@ -526,7 +521,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
// Create file path
|
||||
filePath := filepath.Join(modelPath, file.Filename)
|
||||
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -540,7 +535,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", 0, 0, status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
309
core/http/api.go
309
core/http/api.go
@@ -1,309 +0,0 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/gofiber/swagger" // swagger handler
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/logger"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
)
|
||||
|
||||
func readAuthHeader(c *fiber.Ctx) string {
|
||||
authHeader := c.Get("Authorization")
|
||||
|
||||
// elevenlabs
|
||||
xApiKey := c.Get("xi-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
// anthropic
|
||||
xApiKey = c.Get("x-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
return authHeader
|
||||
}
|
||||
|
||||
// @title LocalAI API
|
||||
// @version 2.0.0
|
||||
// @description The LocalAI Rest API.
|
||||
// @termsOfService
|
||||
// @contact.name LocalAI
|
||||
// @contact.url https://localai.io
|
||||
// @license.name MIT
|
||||
// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
|
||||
// @BasePath /
|
||||
// @securityDefinitions.apikey BearerAuth
|
||||
// @in header
|
||||
// @name Authorization
|
||||
|
||||
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
Views: renderEngine(),
|
||||
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
DisableStartupMessage: appConfig.DisableMessage,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
code := fiber.StatusInternalServerError
|
||||
|
||||
// Retrieve the custom status code if it's a *fiber.Error
|
||||
var e *fiber.Error
|
||||
if errors.As(err, &e) {
|
||||
code = e.Code
|
||||
}
|
||||
|
||||
// Send custom error page
|
||||
return ctx.Status(code).JSON(
|
||||
schema.ErrorResponse{
|
||||
Error: &schema.APIError{Message: err.Error(), Code: code},
|
||||
},
|
||||
)
|
||||
},
|
||||
})
|
||||
|
||||
if appConfig.Debug {
|
||||
app.Use(logger.New(logger.Config{
|
||||
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
|
||||
}))
|
||||
}
|
||||
|
||||
// Default middleware config
|
||||
|
||||
if !appConfig.Debug {
|
||||
app.Use(recover.New())
|
||||
}
|
||||
|
||||
metricsService, err := services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if metricsService != nil {
|
||||
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
|
||||
app.Hooks().OnShutdown(func() error {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||
auth := func(c *fiber.Ctx) error {
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
// Check for api_keys.json file
|
||||
fileContent, err := os.ReadFile("api_keys.json")
|
||||
if err == nil {
|
||||
// Parse JSON content from the file
|
||||
var fileKeys []string
|
||||
err := json.Unmarshal(fileContent, &fileKeys)
|
||||
if err != nil {
|
||||
return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
|
||||
}
|
||||
|
||||
// Add file keys to options.ApiKeys
|
||||
appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
|
||||
}
|
||||
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
authHeader := readAuthHeader(c)
|
||||
if authHeader == "" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
||||
}
|
||||
|
||||
// If it's a bearer token
|
||||
authHeaderParts := strings.Split(authHeader, " ")
|
||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
||||
}
|
||||
|
||||
apiKey := authHeaderParts[1]
|
||||
for _, key := range appConfig.ApiKeys {
|
||||
if apiKey == key {
|
||||
return c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
||||
}
|
||||
|
||||
if appConfig.CORS {
|
||||
var c func(ctx *fiber.Ctx) error
|
||||
if appConfig.CORSAllowOrigins == "" {
|
||||
c = cors.New()
|
||||
} else {
|
||||
c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
|
||||
}
|
||||
|
||||
app.Use(c)
|
||||
}
|
||||
|
||||
// LocalAI API endpoints
|
||||
galleryService := services.NewGalleryService(appConfig.ModelPath)
|
||||
galleryService.Start(appConfig.Context, cl)
|
||||
|
||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
Version string `json:"version"`
|
||||
}{Version: internal.PrintableVersion()})
|
||||
})
|
||||
|
||||
// Make sure directories exists
|
||||
os.MkdirAll(appConfig.ImageDir, 0755)
|
||||
os.MkdirAll(appConfig.AudioDir, 0755)
|
||||
os.MkdirAll(appConfig.UploadDir, 0755)
|
||||
os.MkdirAll(appConfig.ConfigsDir, 0755)
|
||||
os.MkdirAll(appConfig.ModelPath, 0755)
|
||||
|
||||
// Load config jsons
|
||||
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
|
||||
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||
|
||||
welcomeRoute(
|
||||
app,
|
||||
cl,
|
||||
ml,
|
||||
appConfig,
|
||||
auth,
|
||||
)
|
||||
|
||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||
app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||
app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
|
||||
app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Elevenlabs
|
||||
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
|
||||
|
||||
// openAI compatible API endpoint
|
||||
|
||||
// chat
|
||||
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
|
||||
// edit
|
||||
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
|
||||
// assistant
|
||||
app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
|
||||
// files
|
||||
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
|
||||
// completion
|
||||
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
|
||||
// embeddings
|
||||
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// images
|
||||
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
|
||||
|
||||
if appConfig.ImageDir != "" {
|
||||
app.Static("/generated-images", appConfig.ImageDir)
|
||||
}
|
||||
|
||||
if appConfig.AudioDir != "" {
|
||||
app.Static("/generated-audio", appConfig.AudioDir)
|
||||
}
|
||||
|
||||
ok := func(c *fiber.Ctx) error {
|
||||
return c.SendStatus(200)
|
||||
}
|
||||
|
||||
// Kubernetes health checks
|
||||
app.Get("/healthz", ok)
|
||||
app.Get("/readyz", ok)
|
||||
|
||||
// Experimental Backend Statistics Module
|
||||
backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
|
||||
|
||||
// models
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
|
||||
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
|
||||
|
||||
// Define a custom 404 handler
|
||||
// Note: keep this at the bottom!
|
||||
app.Use(notFoundHandler)
|
||||
|
||||
return app, nil
|
||||
}
|
||||
204
core/http/app.go
Normal file
204
core/http/app.go
Normal file
@@ -0,0 +1,204 @@
|
||||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/go-skynet/LocalAI/core/http/routes"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/contrib/fiberzerolog"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
|
||||
// swagger handler
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func readAuthHeader(c *fiber.Ctx) string {
|
||||
authHeader := c.Get("Authorization")
|
||||
|
||||
// elevenlabs
|
||||
xApiKey := c.Get("xi-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
// anthropic
|
||||
xApiKey = c.Get("x-api-key")
|
||||
if xApiKey != "" {
|
||||
authHeader = "Bearer " + xApiKey
|
||||
}
|
||||
|
||||
return authHeader
|
||||
}
|
||||
|
||||
// @title LocalAI API
|
||||
// @version 2.0.0
|
||||
// @description The LocalAI Rest API.
|
||||
// @termsOfService
|
||||
// @contact.name LocalAI
|
||||
// @contact.url https://localai.io
|
||||
// @license.name MIT
|
||||
// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
|
||||
// @BasePath /
|
||||
// @securityDefinitions.apikey BearerAuth
|
||||
// @in header
|
||||
// @name Authorization
|
||||
|
||||
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
Views: renderEngine(),
|
||||
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
// We disable the Fiber startup message as it does not conform to structured logging.
|
||||
// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
|
||||
DisableStartupMessage: true,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
code := fiber.StatusInternalServerError
|
||||
|
||||
// Retrieve the custom status code if it's a *fiber.Error
|
||||
var e *fiber.Error
|
||||
if errors.As(err, &e) {
|
||||
code = e.Code
|
||||
}
|
||||
|
||||
// Send custom error page
|
||||
return ctx.Status(code).JSON(
|
||||
schema.ErrorResponse{
|
||||
Error: &schema.APIError{Message: err.Error(), Code: code},
|
||||
},
|
||||
)
|
||||
},
|
||||
})
|
||||
|
||||
app.Hooks().OnListen(func(listenData fiber.ListenData) error {
|
||||
scheme := "http"
|
||||
if listenData.TLS {
|
||||
scheme = "https"
|
||||
}
|
||||
log.Info().Str("endpoint", scheme+"://"+listenData.Host+":"+listenData.Port).Msg("LocalAI API is listening! Please connect to the endpoint for API documentation.")
|
||||
return nil
|
||||
})
|
||||
|
||||
// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
|
||||
logger := log.Logger
|
||||
app.Use(fiberzerolog.New(fiberzerolog.Config{
|
||||
Logger: &logger,
|
||||
}))
|
||||
|
||||
// Default middleware config
|
||||
|
||||
if !appConfig.Debug {
|
||||
app.Use(recover.New())
|
||||
}
|
||||
|
||||
metricsService, err := services.NewLocalAIMetricsService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if metricsService != nil {
|
||||
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
|
||||
app.Hooks().OnShutdown(func() error {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||
auth := func(c *fiber.Ctx) error {
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
// Check for api_keys.json file
|
||||
fileContent, err := os.ReadFile("api_keys.json")
|
||||
if err == nil {
|
||||
// Parse JSON content from the file
|
||||
var fileKeys []string
|
||||
err := json.Unmarshal(fileContent, &fileKeys)
|
||||
if err != nil {
|
||||
return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
|
||||
}
|
||||
|
||||
// Add file keys to options.ApiKeys
|
||||
appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
|
||||
}
|
||||
|
||||
if len(appConfig.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
authHeader := readAuthHeader(c)
|
||||
if authHeader == "" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
||||
}
|
||||
|
||||
// If it's a bearer token
|
||||
authHeaderParts := strings.Split(authHeader, " ")
|
||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
||||
}
|
||||
|
||||
apiKey := authHeaderParts[1]
|
||||
for _, key := range appConfig.ApiKeys {
|
||||
if apiKey == key {
|
||||
return c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
||||
}
|
||||
|
||||
if appConfig.CORS {
|
||||
var c func(ctx *fiber.Ctx) error
|
||||
if appConfig.CORSAllowOrigins == "" {
|
||||
c = cors.New()
|
||||
} else {
|
||||
c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
|
||||
}
|
||||
|
||||
app.Use(c)
|
||||
}
|
||||
|
||||
// Make sure directories exists
|
||||
os.MkdirAll(appConfig.ImageDir, 0755)
|
||||
os.MkdirAll(appConfig.AudioDir, 0755)
|
||||
os.MkdirAll(appConfig.UploadDir, 0755)
|
||||
os.MkdirAll(appConfig.ConfigsDir, 0755)
|
||||
os.MkdirAll(appConfig.ModelPath, 0755)
|
||||
|
||||
// Load config jsons
|
||||
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
|
||||
galleryService := services.NewGalleryService(appConfig.ModelPath)
|
||||
galleryService.Start(appConfig.Context, cl)
|
||||
|
||||
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
|
||||
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
||||
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
|
||||
routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth)
|
||||
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
||||
routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
|
||||
|
||||
// Define a custom 404 handler
|
||||
// Note: keep this at the bottom!
|
||||
app.Use(notFoundHandler)
|
||||
|
||||
return app, nil
|
||||
}
|
||||
@@ -211,7 +211,6 @@ var _ = Describe("API test", func() {
|
||||
|
||||
commonOpts := []config.AppOption{
|
||||
config.WithDebug(true),
|
||||
config.WithDisableMessage(true),
|
||||
}
|
||||
|
||||
Context("API with ephemeral models", func() {
|
||||
@@ -490,11 +489,10 @@ var _ = Describe("API test", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
modelName := "codellama"
|
||||
|
||||
modelName := "hermes-2-pro-mistral"
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
|
||||
Name: modelName,
|
||||
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
|
||||
ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -557,7 +555,7 @@ var _ = Describe("API test", func() {
|
||||
var res map[string]string
|
||||
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
|
||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||
})
|
||||
247
core/http/elements/gallery.go
Normal file
247
core/http/elements/gallery.go
Normal file
@@ -0,0 +1,247 @@
|
||||
package elements
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/chasefleming/elem-go"
|
||||
"github.com/chasefleming/elem-go/attrs"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
)
|
||||
|
||||
const (
|
||||
NoImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
|
||||
)
|
||||
|
||||
func DoneProgress(uid string) string {
|
||||
return elem.Div(
|
||||
attrs.Props{},
|
||||
elem.H3(
|
||||
attrs.Props{
|
||||
"role": "status",
|
||||
"id": "pblabel",
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text("Installation completed"),
|
||||
),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func ErrorProgress(err string) string {
|
||||
return elem.Div(
|
||||
attrs.Props{},
|
||||
elem.H3(
|
||||
attrs.Props{
|
||||
"role": "status",
|
||||
"id": "pblabel",
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text("Error"+err),
|
||||
),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func ProgressBar(progress string) string {
|
||||
return elem.Div(attrs.Props{
|
||||
"class": "progress",
|
||||
"role": "progressbar",
|
||||
"aria-valuemin": "0",
|
||||
"aria-valuemax": "100",
|
||||
"aria-valuenow": "0",
|
||||
"aria-labelledby": "pblabel",
|
||||
},
|
||||
elem.Div(attrs.Props{
|
||||
"id": "pb",
|
||||
"class": "progress-bar",
|
||||
"style": "width:" + progress + "%",
|
||||
}),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func StartProgressBar(uid, progress string) string {
|
||||
if progress == "" {
|
||||
progress = "0"
|
||||
}
|
||||
return elem.Div(attrs.Props{
|
||||
"hx-trigger": "done",
|
||||
"hx-get": "/browse/job/" + uid,
|
||||
"hx-swap": "outerHTML",
|
||||
"hx-target": "this",
|
||||
},
|
||||
elem.H3(
|
||||
attrs.Props{
|
||||
"role": "status",
|
||||
"id": "pblabel",
|
||||
"tabindex": "-1",
|
||||
"autofocus": "",
|
||||
},
|
||||
elem.Text("Installing"),
|
||||
// This is a simple example of how to use the HTMLX library to create a progress bar that updates every 600ms.
|
||||
elem.Div(attrs.Props{
|
||||
"hx-get": "/browse/job/progress/" + uid,
|
||||
"hx-trigger": "every 600ms",
|
||||
"hx-target": "this",
|
||||
"hx-swap": "innerHTML",
|
||||
},
|
||||
elem.Raw(ProgressBar(progress)),
|
||||
),
|
||||
),
|
||||
).Render()
|
||||
}
|
||||
|
||||
func cardSpan(text, icon string) elem.Node {
|
||||
return elem.Span(
|
||||
attrs.Props{
|
||||
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
|
||||
},
|
||||
elem.I(attrs.Props{
|
||||
"class": icon + " pr-2",
|
||||
}),
|
||||
elem.Text(text),
|
||||
)
|
||||
}
|
||||
|
||||
func ListModels(models []*gallery.GalleryModel) string {
|
||||
modelsElements := []elem.Node{}
|
||||
span := func(s string) elem.Node {
|
||||
return elem.Span(
|
||||
attrs.Props{
|
||||
"class": "float-right inline-block bg-green-500 text-white py-1 px-3 rounded-full text-xs",
|
||||
},
|
||||
elem.Text(s),
|
||||
)
|
||||
}
|
||||
installButton := func(m *gallery.GalleryModel) elem.Node {
|
||||
return elem.Button(
|
||||
attrs.Props{
|
||||
"data-twe-ripple-init": "",
|
||||
"data-twe-ripple-color": "light",
|
||||
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
|
||||
// post the Model ID as param
|
||||
"hx-post": "/browse/install/model/" + fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name),
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fa-solid fa-download pr-2",
|
||||
},
|
||||
),
|
||||
elem.Text("Install"),
|
||||
)
|
||||
}
|
||||
|
||||
descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
|
||||
|
||||
return elem.Div(
|
||||
attrs.Props{
|
||||
"class": "p-6 text-surface dark:text-white",
|
||||
},
|
||||
elem.H5(
|
||||
attrs.Props{
|
||||
"class": "mb-2 text-xl font-medium leading-tight",
|
||||
},
|
||||
elem.Text(m.Name),
|
||||
),
|
||||
elem.P(
|
||||
attrs.Props{
|
||||
"class": "mb-4 text-base",
|
||||
},
|
||||
elem.Text(m.Description),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
actionDiv := func(m *gallery.GalleryModel) elem.Node {
|
||||
nodes := []elem.Node{
|
||||
cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
|
||||
}
|
||||
|
||||
if m.License != "" {
|
||||
nodes = append(nodes,
|
||||
cardSpan("License: "+m.License, "fas fa-book"),
|
||||
)
|
||||
}
|
||||
|
||||
for _, tag := range m.Tags {
|
||||
nodes = append(nodes,
|
||||
cardSpan(tag, "fas fa-tag"),
|
||||
)
|
||||
}
|
||||
|
||||
for i, url := range m.URLs {
|
||||
nodes = append(nodes,
|
||||
elem.A(
|
||||
attrs.Props{
|
||||
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2",
|
||||
"href": url,
|
||||
"target": "_blank",
|
||||
},
|
||||
elem.I(attrs.Props{
|
||||
"class": "fas fa-link pr-2",
|
||||
}),
|
||||
elem.Text("Link #"+fmt.Sprintf("%d", i+1)),
|
||||
))
|
||||
}
|
||||
|
||||
return elem.Div(
|
||||
attrs.Props{
|
||||
"class": "px-6 pt-4 pb-2",
|
||||
},
|
||||
elem.P(
|
||||
attrs.Props{
|
||||
"class": "mb-4 text-base",
|
||||
},
|
||||
nodes...,
|
||||
),
|
||||
elem.If(m.Installed, span("Installed"), installButton(m)),
|
||||
)
|
||||
}
|
||||
|
||||
for _, m := range models {
|
||||
|
||||
elems := []elem.Node{}
|
||||
|
||||
if m.Icon == "" {
|
||||
m.Icon = NoImage
|
||||
}
|
||||
|
||||
elems = append(elems,
|
||||
|
||||
elem.Div(attrs.Props{
|
||||
"class": "flex justify-center items-center",
|
||||
},
|
||||
elem.A(attrs.Props{
|
||||
"href": "#!",
|
||||
// "class": "justify-center items-center",
|
||||
},
|
||||
elem.Img(attrs.Props{
|
||||
// "class": "rounded-t-lg object-fit object-center h-96",
|
||||
"class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
|
||||
"src": m.Icon,
|
||||
}),
|
||||
),
|
||||
))
|
||||
|
||||
elems = append(elems, descriptionDiv(m), actionDiv(m))
|
||||
modelsElements = append(modelsElements,
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": " me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2",
|
||||
},
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
// "class": "p-6",
|
||||
},
|
||||
elems...,
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
wrapper := elem.Div(attrs.Props{
|
||||
"class": "dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark",
|
||||
//"class": "block rounded-lg bg-white shadow-secondary-1 dark:bg-surface-dark",
|
||||
}, modelsElements...)
|
||||
|
||||
return wrapper.Render()
|
||||
}
|
||||
84
core/http/endpoints/jina/rerank.go
Normal file
84
core/http/endpoints/jina/rerank.go
Normal file
@@ -0,0 +1,84 @@
|
||||
package jina
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
|
||||
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
req := new(schema.JINARerankRequest)
|
||||
if err := c.BodyParser(req); err != nil {
|
||||
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
|
||||
"error": "Cannot parse JSON",
|
||||
})
|
||||
}
|
||||
|
||||
input := new(schema.TTSRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
}
|
||||
|
||||
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||
config.LoadOptionDebug(appConfig.Debug),
|
||||
config.LoadOptionThreads(appConfig.Threads),
|
||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||
config.LoadOptionF16(appConfig.F16),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
modelFile = cfg.Model
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
if input.Backend != "" {
|
||||
cfg.Backend = input.Backend
|
||||
}
|
||||
|
||||
request := &proto.RerankRequest{
|
||||
Query: req.Query,
|
||||
TopN: int32(req.TopN),
|
||||
Documents: req.Documents,
|
||||
}
|
||||
|
||||
results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
response := &schema.JINARerankResponse{
|
||||
Model: req.Model,
|
||||
}
|
||||
|
||||
for _, r := range results.Results {
|
||||
response.Results = append(response.Results, schema.JINADocumentResult{
|
||||
Index: int(r.Index),
|
||||
Document: schema.JINAText{Text: r.Text},
|
||||
RelevanceScore: float64(r.RelevanceScore),
|
||||
})
|
||||
}
|
||||
|
||||
response.Usage.TotalTokens = int(results.Usage.TotalTokens)
|
||||
response.Usage.PromptTokens = int(results.Usage.PromptTokens)
|
||||
|
||||
return c.Status(fiber.StatusOK).JSON(response)
|
||||
}
|
||||
}
|
||||
32
core/http/endpoints/localai/welcome.go
Normal file
32
core/http/endpoints/localai/welcome.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||
cl *config.BackendConfigLoader, ml *model.ModelLoader) func(*fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, _ := ml.ListModels()
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": models,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
|
||||
// The client expects a JSON response
|
||||
return c.Status(fiber.StatusOK).JSON(summary)
|
||||
} else {
|
||||
// Render index
|
||||
return c.Render("views/index", summary)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -455,21 +455,19 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
||||
for i, assistant := range Assistants {
|
||||
if assistant.ID == assistantID {
|
||||
for j, fileId := range assistant.FileIDs {
|
||||
if fileId == fileId {
|
||||
Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
|
||||
Assistants[i].FileIDs = append(Assistants[i].FileIDs[:j], Assistants[i].FileIDs[j+1:]...)
|
||||
|
||||
// Check if the file exists in the assistantFiles slice
|
||||
for i, assistantFile := range AssistantFiles {
|
||||
if assistantFile.ID == fileId {
|
||||
// Remove the file from the assistantFiles slice
|
||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
|
||||
ID: fileId,
|
||||
Object: "assistant.file.deleted",
|
||||
Deleted: true,
|
||||
})
|
||||
}
|
||||
// Check if the file exists in the assistantFiles slice
|
||||
for i, assistantFile := range AssistantFiles {
|
||||
if assistantFile.ID == fileId {
|
||||
// Remove the file from the assistantFiles slice
|
||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
|
||||
ID: fileId,
|
||||
Object: "assistant.file.deleted",
|
||||
Deleted: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,9 +11,8 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -68,8 +67,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
return true
|
||||
})
|
||||
|
||||
results := parseFunctionCall(result, config.FunctionsConfig.ParallelCalls)
|
||||
noActionToRun := len(results) > 0 && results[0].name == noAction
|
||||
results := functions.ParseFunctionCall(result, config.FunctionsConfig)
|
||||
noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
|
||||
|
||||
switch {
|
||||
case noActionToRun:
|
||||
@@ -82,7 +81,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
responses <- initialMessage
|
||||
|
||||
result, err := handleQuestion(config, req, ml, startupOptions, results[0].arguments, prompt)
|
||||
result, err := handleQuestion(config, req, ml, startupOptions, results, prompt)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
@@ -105,7 +104,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
|
||||
default:
|
||||
for i, ss := range results {
|
||||
name, args := ss.name, ss.arguments
|
||||
name, args := ss.Name, ss.Arguments
|
||||
|
||||
initialMessage := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
@@ -156,8 +155,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
processFunctions := false
|
||||
funcs := grammar.Functions{}
|
||||
modelFile, input, err := readRequest(c, ml, startupOptions, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
@@ -169,6 +166,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
log.Debug().Msgf("Configuration read: %+v", config)
|
||||
|
||||
funcs := input.Functions
|
||||
shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
|
||||
|
||||
// Allow the user to set custom actions via config file
|
||||
// to be "embedded" in each model
|
||||
noActionName := "answer"
|
||||
@@ -182,18 +182,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
input.Grammar = functions.JSONBNF
|
||||
}
|
||||
|
||||
config.Grammar = input.Grammar
|
||||
|
||||
// process functions if we have any defined or if we have a function call string
|
||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||
if shouldUseFn {
|
||||
log.Debug().Msgf("Response needs to process functions")
|
||||
}
|
||||
|
||||
processFunctions = true
|
||||
|
||||
noActionGrammar := grammar.Function{
|
||||
switch {
|
||||
case !config.FunctionsConfig.NoGrammar && shouldUseFn:
|
||||
noActionGrammar := functions.Function{
|
||||
Name: noActionName,
|
||||
Description: noActionDescription,
|
||||
Parameters: map[string]interface{}{
|
||||
@@ -206,7 +206,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
// Append the no action function
|
||||
funcs = append(funcs, input.Functions...)
|
||||
if !config.FunctionsConfig.DisableNoAction {
|
||||
funcs = append(funcs, noActionGrammar)
|
||||
}
|
||||
@@ -219,10 +218,17 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// Update input grammar
|
||||
jsStruct := funcs.ToJSONStructure()
|
||||
config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
} else if input.JSONFunctionGrammarObject != nil {
|
||||
case input.JSONFunctionGrammarObject != nil:
|
||||
config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
default:
|
||||
// Force picking one of the functions by the request
|
||||
if config.FunctionToCall() != "" {
|
||||
funcs = funcs.Select(config.FunctionToCall())
|
||||
}
|
||||
}
|
||||
|
||||
// process functions if we have any defined or if we have a function call string
|
||||
|
||||
// functions are not supported in stream mode (yet?)
|
||||
toStream := input.Stream
|
||||
|
||||
@@ -232,8 +238,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
|
||||
// If we are using the tokenizer template, we don't need to process the messages
|
||||
// unless we are processing functions
|
||||
if !config.TemplateConfig.UseTokenizerTemplate || processFunctions {
|
||||
|
||||
if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
|
||||
suppressConfigSystemPrompt := false
|
||||
mess := []string{}
|
||||
for messageIndex, i := range input.Messages {
|
||||
@@ -346,11 +351,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
templateFile = config.Model
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Chat != "" && !processFunctions {
|
||||
if config.TemplateConfig.Chat != "" && !shouldUseFn {
|
||||
templateFile = config.TemplateConfig.Chat
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Functions != "" && processFunctions {
|
||||
if config.TemplateConfig.Functions != "" && shouldUseFn {
|
||||
templateFile = config.TemplateConfig.Functions
|
||||
}
|
||||
|
||||
@@ -370,7 +375,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Prompt (after templating): %s", predInput)
|
||||
if processFunctions {
|
||||
if shouldUseFn && config.Grammar != "" {
|
||||
log.Debug().Msgf("Grammar: %+v", config.Grammar)
|
||||
}
|
||||
}
|
||||
@@ -388,7 +393,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
|
||||
if !processFunctions {
|
||||
if !shouldUseFn {
|
||||
go process(predInput, input, config, ml, responses)
|
||||
} else {
|
||||
go processTools(noActionName, predInput, input, config, ml, responses)
|
||||
@@ -446,18 +451,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// no streaming mode
|
||||
default:
|
||||
result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
|
||||
if !processFunctions {
|
||||
if !shouldUseFn {
|
||||
// no function is called, just reply and use stop as finish reason
|
||||
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||
return
|
||||
}
|
||||
|
||||
results := parseFunctionCall(s, config.FunctionsConfig.ParallelCalls)
|
||||
noActionsToRun := len(results) > 0 && results[0].name == noActionName
|
||||
results := functions.ParseFunctionCall(s, config.FunctionsConfig)
|
||||
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
||||
|
||||
switch {
|
||||
case noActionsToRun:
|
||||
result, err := handleQuestion(config, input, ml, startupOptions, results[0].arguments, predInput)
|
||||
result, err := handleQuestion(config, input, ml, startupOptions, results, predInput)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
@@ -476,7 +481,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
for _, ss := range results {
|
||||
name, args := ss.name, ss.arguments
|
||||
name, args := ss.Name, ss.Arguments
|
||||
if len(input.Tools) > 0 {
|
||||
// If we are using tools, we condense the function calls into
|
||||
// a single response choice with all the tools
|
||||
@@ -534,16 +539,20 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, args, prompt string) (string, error) {
|
||||
func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, prompt string) (string, error) {
|
||||
log.Debug().Msgf("nothing to do, computing a reply")
|
||||
|
||||
arg := ""
|
||||
if len(funcResults) > 0 {
|
||||
arg = funcResults[0].Arguments
|
||||
}
|
||||
// If there is a message that the LLM already sends as part of the JSON reply, use it
|
||||
arguments := map[string]interface{}{}
|
||||
json.Unmarshal([]byte(args), &arguments)
|
||||
if err := json.Unmarshal([]byte(arg), &arguments); err != nil {
|
||||
log.Debug().Msg("handleQuestion: function result did not contain a valid JSON object")
|
||||
}
|
||||
m, exists := arguments["message"]
|
||||
if exists {
|
||||
switch message := m.(type) {
|
||||
@@ -580,63 +589,3 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
||||
}
|
||||
return backend.Finetune(*config, prompt, prediction.Response), nil
|
||||
}
|
||||
|
||||
type funcCallResults struct {
|
||||
name string
|
||||
arguments string
|
||||
}
|
||||
|
||||
func parseFunctionCall(llmresult string, multipleResults bool) []funcCallResults {
|
||||
results := []funcCallResults{}
|
||||
|
||||
// TODO: use generics to avoid this code duplication
|
||||
if multipleResults {
|
||||
ss := []map[string]interface{}{}
|
||||
s := utils.EscapeNewLines(llmresult)
|
||||
json.Unmarshal([]byte(s), &ss)
|
||||
log.Debug().Msgf("Function return: %s %+v", s, ss)
|
||||
|
||||
for _, s := range ss {
|
||||
func_name, ok := s["function"]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
args, ok := s["arguments"]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
d, _ := json.Marshal(args)
|
||||
funcName, ok := func_name.(string)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
results = append(results, funcCallResults{name: funcName, arguments: string(d)})
|
||||
}
|
||||
} else {
|
||||
// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
|
||||
ss := map[string]interface{}{}
|
||||
// This prevent newlines to break JSON parsing for clients
|
||||
s := utils.EscapeNewLines(llmresult)
|
||||
json.Unmarshal([]byte(s), &ss)
|
||||
log.Debug().Msgf("Function return: %s %+v", s, ss)
|
||||
|
||||
// The grammar defines the function name as "function", while OpenAI returns "name"
|
||||
func_name, ok := ss["function"]
|
||||
if !ok {
|
||||
return results
|
||||
}
|
||||
// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
|
||||
args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
|
||||
if !ok {
|
||||
return results
|
||||
}
|
||||
d, _ := json.Marshal(args)
|
||||
funcName, ok := func_name.(string)
|
||||
if !ok {
|
||||
return results
|
||||
}
|
||||
results = append(results, funcCallResults{name: funcName, arguments: string(d)})
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
@@ -70,7 +70,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
input.Grammar = functions.JSONBNF
|
||||
}
|
||||
|
||||
config.Grammar = input.Grammar
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
"github.com/go-skynet/LocalAI/pkg/functions"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -145,7 +145,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
||||
}
|
||||
|
||||
if input.ToolsChoice != nil {
|
||||
var toolChoice grammar.Tool
|
||||
var toolChoice functions.Tool
|
||||
|
||||
switch content := input.ToolsChoice.(type) {
|
||||
case string:
|
||||
|
||||
@@ -7,10 +7,7 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/Masterminds/sprig/v3"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
fiberhtml "github.com/gofiber/template/html/v2"
|
||||
"github.com/russross/blackfriday"
|
||||
@@ -33,40 +30,6 @@ func notFoundHandler(c *fiber.Ctx) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func welcomeRoute(
|
||||
app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error,
|
||||
) {
|
||||
if appConfig.DisableWelcomePage {
|
||||
return
|
||||
}
|
||||
|
||||
models, _ := ml.ListModels()
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
app.Get("/", auth, func(c *fiber.Ctx) error {
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": models,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
|
||||
// The client expects a JSON response
|
||||
return c.Status(fiber.StatusOK).JSON(summary)
|
||||
} else {
|
||||
// Render index
|
||||
return c.Render("views/index", summary)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func renderEngine() *fiberhtml.Engine {
|
||||
engine := fiberhtml.NewFileSystem(http.FS(viewsfs), ".html")
|
||||
engine.AddFuncMap(sprig.FuncMap())
|
||||
|
||||
19
core/http/routes/elevenlabs.go
Normal file
19
core/http/routes/elevenlabs.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterElevenLabsRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
// Elevenlabs
|
||||
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
}
|
||||
19
core/http/routes/jina.go
Normal file
19
core/http/routes/jina.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/jina"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterJINARoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
// POST endpoint to mimic the reranking
|
||||
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||
}
|
||||
63
core/http/routes/localai.go
Normal file
63
core/http/routes/localai.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/swagger"
|
||||
)
|
||||
|
||||
func RegisterLocalAIRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
galleryService *services.GalleryService,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||
|
||||
// LocalAI API endpoints
|
||||
|
||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||
app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||
app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||
|
||||
app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
|
||||
app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
|
||||
|
||||
// Kubernetes health checks
|
||||
ok := func(c *fiber.Ctx) error {
|
||||
return c.SendStatus(200)
|
||||
}
|
||||
|
||||
app.Get("/healthz", ok)
|
||||
app.Get("/readyz", ok)
|
||||
|
||||
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
|
||||
|
||||
// Experimental Backend Statistics Module
|
||||
backendMonitor := services.NewBackendMonitor(cl, ml, appConfig) // Split out for now
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitor))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitor))
|
||||
|
||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
Version string `json:"version"`
|
||||
}{Version: internal.PrintableVersion()})
|
||||
})
|
||||
|
||||
}
|
||||
86
core/http/routes/openai.go
Normal file
86
core/http/routes/openai.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterOpenAIRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
// openAI compatible API endpoint
|
||||
|
||||
// chat
|
||||
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
||||
|
||||
// edit
|
||||
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
||||
|
||||
// assistant
|
||||
app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||
|
||||
// files
|
||||
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
||||
app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||
|
||||
// completion
|
||||
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
||||
|
||||
// embeddings
|
||||
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||
|
||||
// audio
|
||||
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
|
||||
app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
||||
|
||||
// images
|
||||
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
|
||||
|
||||
if appConfig.ImageDir != "" {
|
||||
app.Static("/generated-images", appConfig.ImageDir)
|
||||
}
|
||||
|
||||
if appConfig.AudioDir != "" {
|
||||
app.Static("/generated-audio", appConfig.AudioDir)
|
||||
}
|
||||
|
||||
// models
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
}
|
||||
111
core/http/routes/ui.go
Normal file
111
core/http/routes/ui.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/elements"
|
||||
"github.com/go-skynet/LocalAI/core/services"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
func RegisterUIRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
galleryService *services.GalleryService,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
// Show the Models page
|
||||
app.Get("/browse", auth, func(c *fiber.Ctx) error {
|
||||
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Models",
|
||||
"Models": template.HTML(elements.ListModels(models)),
|
||||
"Repositories": appConfig.Galleries,
|
||||
// "ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/models", summary)
|
||||
})
|
||||
|
||||
// HTMX: return the model details
|
||||
// https://htmx.org/examples/active-search/
|
||||
app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
|
||||
form := struct {
|
||||
Search string `form:"search"`
|
||||
}{}
|
||||
if err := c.BodyParser(&form); err != nil {
|
||||
return c.Status(fiber.StatusBadRequest).SendString(err.Error())
|
||||
}
|
||||
|
||||
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
||||
|
||||
filteredModels := []*gallery.GalleryModel{}
|
||||
for _, m := range models {
|
||||
if strings.Contains(m.Name, form.Search) ||
|
||||
strings.Contains(m.Description, form.Search) ||
|
||||
strings.Contains(m.Gallery.Name, form.Search) ||
|
||||
strings.Contains(strings.Join(m.Tags, ","), form.Search) {
|
||||
filteredModels = append(filteredModels, m)
|
||||
}
|
||||
}
|
||||
|
||||
return c.SendString(elements.ListModels(filteredModels))
|
||||
})
|
||||
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
|
||||
galleryID := strings.Clone(c.Params("id")) // strings.Clone is required!
|
||||
|
||||
id, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uid := id.String()
|
||||
|
||||
op := gallery.GalleryOp{
|
||||
Id: uid,
|
||||
GalleryName: galleryID,
|
||||
Galleries: appConfig.Galleries,
|
||||
}
|
||||
go func() {
|
||||
galleryService.C <- op
|
||||
}()
|
||||
|
||||
return c.SendString(elements.StartProgressBar(uid, "0"))
|
||||
})
|
||||
|
||||
// https://htmx.org/examples/progress-bar/
|
||||
app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
|
||||
jobUID := c.Params("uid")
|
||||
|
||||
status := galleryService.GetStatus(jobUID)
|
||||
if status == nil {
|
||||
//fmt.Errorf("could not find any status for ID")
|
||||
return c.SendString(elements.ProgressBar("0"))
|
||||
}
|
||||
|
||||
if status.Progress == 100 {
|
||||
c.Set("HX-Trigger", "done")
|
||||
return c.SendString(elements.ProgressBar("100"))
|
||||
}
|
||||
if status.Error != nil {
|
||||
return c.SendString(elements.ErrorProgress(status.Error.Error()))
|
||||
}
|
||||
|
||||
return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
|
||||
})
|
||||
|
||||
app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.DoneProgress(c.Params("uid")))
|
||||
})
|
||||
}
|
||||
19
core/http/routes/welcome.go
Normal file
19
core/http/routes/welcome.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func RegisterPagesRoutes(app *fiber.App,
|
||||
cl *config.BackendConfigLoader,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
|
||||
if !appConfig.DisableWelcomePage {
|
||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml))
|
||||
}
|
||||
}
|
||||
34
core/http/views/models.html
Normal file
34
core/http/views/models.html
Normal file
@@ -0,0 +1,34 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
{{template "views/partials/head" .}}
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
{{template "views/partials/navbar" .}}
|
||||
<div class="container mx-auto px-4 flex-grow">
|
||||
|
||||
<div class="models mt-12">
|
||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
||||
🖼️ Available models from <i>{{ len .Repositories }}</i> repositories <a href="https://localai.io/models/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a></h2>
|
||||
|
||||
|
||||
<span class="htmx-indicator loader"></span>
|
||||
<input class="form-control appearance-none block w-full px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
|
||||
name="search" placeholder="Begin Typing To Search models..."
|
||||
hx-post="/browse/search/models"
|
||||
hx-trigger="input changed delay:500ms, search"
|
||||
hx-target="#search-results"
|
||||
hx-indicator=".htmx-indicator">
|
||||
|
||||
<div id="search-results">{{.Models}}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "views/partials/footer" .}}
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -3,11 +3,76 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{{.Title}}</title>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
||||
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
|
||||
<link
|
||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
|
||||
rel="stylesheet" />
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
|
||||
<script src="https://cdn.tailwindcss.com/3.3.0"></script>
|
||||
<script>
|
||||
tailwind.config = {
|
||||
darkMode: "class",
|
||||
theme: {
|
||||
fontFamily: {
|
||||
sans: ["Roboto", "sans-serif"],
|
||||
body: ["Roboto", "sans-serif"],
|
||||
mono: ["ui-monospace", "monospace"],
|
||||
},
|
||||
},
|
||||
corePlugins: {
|
||||
preflight: false,
|
||||
},
|
||||
};
|
||||
</script>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
|
||||
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
|
||||
<style>
|
||||
body {
|
||||
font-family: 'Inter', sans-serif;
|
||||
}
|
||||
/* Loader (https://cssloaders.github.io/) */
|
||||
.loader {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
display: block;
|
||||
margin:15px auto;
|
||||
position: relative;
|
||||
color: #FFF;
|
||||
box-sizing: border-box;
|
||||
animation: animloader 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes animloader {
|
||||
0% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
25% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 2px; }
|
||||
50% { box-shadow: 14px 0 0 -2px, 38px 0 0 -2px, -14px 0 0 2px, -38px 0 0 -2px; }
|
||||
75% { box-shadow: 14px 0 0 2px, 38px 0 0 -2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
100% { box-shadow: 14px 0 0 -2px, 38px 0 0 2px, -14px 0 0 -2px, -38px 0 0 -2px; }
|
||||
}
|
||||
.progress {
|
||||
height: 20px;
|
||||
margin-bottom: 20px;
|
||||
overflow: hidden;
|
||||
background-color: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
box-shadow: inset 0 1px 2px rgba(0,0,0,.1);
|
||||
}
|
||||
.progress-bar {
|
||||
float: left;
|
||||
width: 0%;
|
||||
height: 100%;
|
||||
font-size: 12px;
|
||||
line-height: 20px;
|
||||
color: #fff;
|
||||
text-align: center;
|
||||
background-color: #337ab7;
|
||||
-webkit-box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
box-shadow: inset 0 -1px 0 rgba(0,0,0,.15);
|
||||
-webkit-transition: width .6s ease;
|
||||
-o-transition: width .6s ease;
|
||||
transition: width .6s ease;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
@@ -9,6 +9,7 @@
|
||||
<div>
|
||||
<a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
|
||||
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
||||
<a href="/browse/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
|
||||
<a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
34
core/schema/jina.go
Normal file
34
core/schema/jina.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package schema
|
||||
|
||||
// RerankRequest defines the structure of the request payload
|
||||
type JINARerankRequest struct {
|
||||
Model string `json:"model"`
|
||||
Query string `json:"query"`
|
||||
Documents []string `json:"documents"`
|
||||
TopN int `json:"top_n"`
|
||||
}
|
||||
|
||||
// DocumentResult represents a single document result
|
||||
type JINADocumentResult struct {
|
||||
Index int `json:"index"`
|
||||
Document JINAText `json:"document"`
|
||||
RelevanceScore float64 `json:"relevance_score"`
|
||||
}
|
||||
|
||||
// Text holds the text of the document
|
||||
type JINAText struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
// RerankResponse defines the structure of the response payload
|
||||
type JINARerankResponse struct {
|
||||
Model string `json:"model"`
|
||||
Usage JINAUsageInfo `json:"usage"`
|
||||
Results []JINADocumentResult `json:"results"`
|
||||
}
|
||||
|
||||
// UsageInfo holds information about usage of tokens
|
||||
type JINAUsageInfo struct {
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
}
|
||||
@@ -3,7 +3,7 @@ package schema
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
functions "github.com/go-skynet/LocalAI/pkg/functions"
|
||||
)
|
||||
|
||||
// APIError provides error information returned by the OpenAI API.
|
||||
@@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct {
|
||||
type OpenAIRequest struct {
|
||||
PredictionOptions
|
||||
|
||||
Context context.Context `json:"-"`
|
||||
Context context.Context `json:"-"`
|
||||
Cancel context.CancelFunc `json:"-"`
|
||||
|
||||
// whisper
|
||||
@@ -130,11 +130,11 @@ type OpenAIRequest struct {
|
||||
Messages []Message `json:"messages" yaml:"messages"`
|
||||
|
||||
// A list of available functions to call
|
||||
Functions []grammar.Function `json:"functions" yaml:"functions"`
|
||||
FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object
|
||||
Functions functions.Functions `json:"functions" yaml:"functions"`
|
||||
FunctionCall interface{} `json:"function_call" yaml:"function_call"` // might be a string or an object
|
||||
|
||||
Tools []grammar.Tool `json:"tools,omitempty" yaml:"tools"`
|
||||
ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"`
|
||||
Tools []functions.Tool `json:"tools,omitempty" yaml:"tools"`
|
||||
ToolsChoice interface{} `json:"tool_choice,omitempty" yaml:"tool_choice"`
|
||||
|
||||
Stream bool `json:"stream"`
|
||||
|
||||
@@ -145,7 +145,7 @@ type OpenAIRequest struct {
|
||||
// A grammar to constrain the LLM output
|
||||
Grammar string `json:"grammar" yaml:"grammar"`
|
||||
|
||||
JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
||||
JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
||||
|
||||
Backend string `json:"backend" yaml:"backend"`
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
@@ -12,89 +13,157 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type WatchConfigDirectoryCloser func() error
|
||||
type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
|
||||
|
||||
func ReadApiKeysJson(configDir string, appConfig *config.ApplicationConfig) error {
|
||||
fileContent, err := os.ReadFile(path.Join(configDir, "api_keys.json"))
|
||||
if err == nil {
|
||||
// Parse JSON content from the file
|
||||
var fileKeys []string
|
||||
err := json.Unmarshal(fileContent, &fileKeys)
|
||||
if err == nil {
|
||||
appConfig.ApiKeys = append(appConfig.ApiKeys, fileKeys...)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
return err
|
||||
type configFileHandler struct {
|
||||
handlers map[string]fileHandler
|
||||
|
||||
watcher *fsnotify.Watcher
|
||||
|
||||
configDir string
|
||||
appConfig *config.ApplicationConfig
|
||||
}
|
||||
|
||||
func ReadExternalBackendsJson(configDir string, appConfig *config.ApplicationConfig) error {
|
||||
fileContent, err := os.ReadFile(path.Join(configDir, "external_backends.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
|
||||
// then we can export it to other packages
|
||||
func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
|
||||
c := configFileHandler{
|
||||
handlers: make(map[string]fileHandler),
|
||||
configDir: appConfig.DynamicConfigsDir,
|
||||
appConfig: appConfig,
|
||||
}
|
||||
// Parse JSON content from the file
|
||||
var fileBackends map[string]string
|
||||
err = json.Unmarshal(fileContent, &fileBackends)
|
||||
if err != nil {
|
||||
return err
|
||||
c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
|
||||
c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
|
||||
_, ok := c.handlers[filename]
|
||||
if ok {
|
||||
return fmt.Errorf("handler already registered for file %s", filename)
|
||||
}
|
||||
err = mergo.Merge(&appConfig.ExternalGRPCBackends, fileBackends)
|
||||
if err != nil {
|
||||
return err
|
||||
c.handlers[filename] = handler
|
||||
if runNow {
|
||||
c.callHandler(path.Join(c.appConfig.DynamicConfigsDir, filename), handler)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var CONFIG_FILE_UPDATES = map[string]func(configDir string, appConfig *config.ApplicationConfig) error{
|
||||
"api_keys.json": ReadApiKeysJson,
|
||||
"external_backends.json": ReadExternalBackendsJson,
|
||||
func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
|
||||
fileContent, err := os.ReadFile(filename)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
log.Error().Err(err).Str("filename", filename).Msg("could not read file")
|
||||
}
|
||||
|
||||
if err = handler(fileContent, c.appConfig); err != nil {
|
||||
log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
|
||||
}
|
||||
}
|
||||
|
||||
func WatchConfigDirectory(configDir string, appConfig *config.ApplicationConfig) (WatchConfigDirectoryCloser, error) {
|
||||
if len(configDir) == 0 {
|
||||
return nil, fmt.Errorf("configDir blank")
|
||||
}
|
||||
func (c *configFileHandler) Watch() error {
|
||||
configWatcher, err := fsnotify.NewWatcher()
|
||||
c.watcher = configWatcher
|
||||
if err != nil {
|
||||
log.Fatal().Msgf("Unable to create a watcher for the LocalAI Configuration Directory: %+v", err)
|
||||
log.Fatal().Err(err).Str("configdir", c.configDir).Msg("wnable to create a watcher for configuration directory")
|
||||
}
|
||||
ret := func() error {
|
||||
configWatcher.Close()
|
||||
return nil
|
||||
|
||||
if c.appConfig.DynamicConfigsDirPollInterval > 0 {
|
||||
log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
|
||||
ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
|
||||
go func() {
|
||||
for {
|
||||
<-ticker.C
|
||||
for file, handler := range c.handlers {
|
||||
log.Debug().Str("file", file).Msg("polling config file")
|
||||
c.callHandler(file, handler)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Start listening for events.
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case event, ok := <-configWatcher.Events:
|
||||
case event, ok := <-c.watcher.Events:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if event.Has(fsnotify.Write) {
|
||||
for targetName, watchFn := range CONFIG_FILE_UPDATES {
|
||||
if event.Name == targetName {
|
||||
err := watchFn(configDir, appConfig)
|
||||
log.Warn().Msgf("WatchConfigDirectory goroutine for %s: failed to update options: %+v", targetName, err)
|
||||
}
|
||||
if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
|
||||
handler, ok := c.handlers[path.Base(event.Name)]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
c.callHandler(event.Name, handler)
|
||||
}
|
||||
case _, ok := <-configWatcher.Errors:
|
||||
case err, ok := <-c.watcher.Errors:
|
||||
log.Error().Err(err).Msg("config watcher error received")
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
log.Error().Err(err).Msg("error encountered while watching config directory")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Add a path.
|
||||
err = configWatcher.Add(configDir)
|
||||
err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
|
||||
if err != nil {
|
||||
return ret, fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err)
|
||||
return fmt.Errorf("unable to establish watch on the LocalAI Configuration Directory: %+v", err)
|
||||
}
|
||||
|
||||
return ret, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: When we institute graceful shutdown, this should be called
|
||||
func (c *configFileHandler) Stop() {
|
||||
c.watcher.Close()
|
||||
}
|
||||
|
||||
func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
|
||||
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
|
||||
log.Debug().Msg("processing api_keys.json")
|
||||
|
||||
if len(fileContent) > 0 {
|
||||
// Parse JSON content from the file
|
||||
var fileKeys []string
|
||||
err := json.Unmarshal(fileContent, &fileKeys)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
|
||||
} else {
|
||||
appConfig.ApiKeys = startupAppConfig.ApiKeys
|
||||
}
|
||||
log.Debug().Msg("api keys loaded from api_keys.json")
|
||||
return nil
|
||||
}
|
||||
|
||||
return handler
|
||||
}
|
||||
|
||||
func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
|
||||
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
|
||||
log.Debug().Msg("processing external_backends.json")
|
||||
|
||||
if len(fileContent) > 0 {
|
||||
// Parse JSON content from the file
|
||||
var fileBackends map[string]string
|
||||
err := json.Unmarshal(fileContent, &fileBackends)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
|
||||
err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
|
||||
}
|
||||
log.Debug().Msg("external backends loaded from external_backends.json")
|
||||
return nil
|
||||
}
|
||||
return handler
|
||||
}
|
||||
|
||||
@@ -10,18 +10,12 @@ import (
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
pkgStartup "github.com/go-skynet/LocalAI/pkg/startup"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
|
||||
options := config.NewApplicationConfig(opts...)
|
||||
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if options.Debug {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
}
|
||||
|
||||
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
|
||||
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
||||
|
||||
@@ -125,6 +119,11 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
||||
}()
|
||||
}
|
||||
|
||||
// Watch the configuration directory
|
||||
// If the directory does not exist, we don't watch it
|
||||
configHandler := newConfigFileHandler(options)
|
||||
configHandler.Watch()
|
||||
|
||||
log.Info().Msg("core/startup process completed!")
|
||||
return cl, ml, options, nil
|
||||
}
|
||||
|
||||
@@ -402,6 +402,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
|
||||
| --upload-path | /tmp/localai/upload | Path to store uploads from files api | $LOCALAI_UPLOAD_PATH |
|
||||
| --config-path | /tmp/localai/config | | $LOCALAI_CONFIG_PATH |
|
||||
| --localai-config-dir | BASEPATH/configuration | Directory for dynamic loading of certain configuration files (currently api_keys.json and external_backends.json) | $LOCALAI_CONFIG_DIR |
|
||||
| --localai-config-dir-poll-interval | | Typically the config path picks up changes automatically, but if your system has broken fsnotify events, set this to a time duration to poll the LocalAI Config Dir (example: 1m) | $LOCALAI_CONFIG_DIR_POLL_INTERVAL |
|
||||
| --models-config-file | STRING | YAML file containing a list of model backend configs | $LOCALAI_MODELS_CONFIG_FILE |
|
||||
|
||||
#### Models Flags
|
||||
|
||||
@@ -12,7 +12,7 @@ Section under construction
|
||||
This section contains instruction on how to use LocalAI with GPU acceleration.
|
||||
|
||||
{{% alert icon="⚡" context="warning" %}}
|
||||
For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
|
||||
For accelleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
|
||||
{{% /alert %}}
|
||||
|
||||
|
||||
@@ -110,6 +110,143 @@ llama_model_load_internal: total VRAM used: 1598 MB
|
||||
llama_init_from_file: kv self size = 512.00 MB
|
||||
```
|
||||
|
||||
## ROCM(AMD) acceleration
|
||||
|
||||
There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
|
||||
|
||||
Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatability and package versions for dependencies across all variations of OS must be tested independently if disired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.
|
||||
|
||||
### Requirements
|
||||
|
||||
- `ROCm 6.x.x` compatible GPU/accelerator
|
||||
- OS: `Ubuntu` (22.04, 20.04), `RHEL` (9.3, 9.2, 8.9, 8.8), `SLES` (15.5, 15.4)
|
||||
- Installed to host: `amdgpu-dkms` and `rocm` >=6.0.0 as per ROCm documentation.
|
||||
|
||||
### Recommendations
|
||||
|
||||
- Do not use on a system running Wayland.
|
||||
- If running with Xorg do not use GPU assigned for compute for desktop rendering.
|
||||
- Ensure at least 100GB of free space on disk hosting container runtime and storing images prior to installation.
|
||||
|
||||
### Limitations
|
||||
|
||||
Ongoing verification testing of ROCm compatability with integrated backends.
|
||||
Please note the following list of verified backends and devices.
|
||||
|
||||
### Verified
|
||||
|
||||
The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
|
||||
|
||||
| Backend | Verified | Devices |
|
||||
| ---- | ---- | ---- |
|
||||
| llama.cpp | yes | Radeon VII (gfx906) |
|
||||
| diffusers | yes | Radeon VII (gfx906) |
|
||||
| piper | yes | Radeon VII (gfx906) |
|
||||
| whisper | no | none |
|
||||
| autogptq | no | none |
|
||||
| bark | no | none |
|
||||
| coqui | no | none |
|
||||
| transformers | no | none |
|
||||
| exllama | no | none |
|
||||
| exllama2 | no | none |
|
||||
| mamba | no | none |
|
||||
| petals | no | none |
|
||||
| sentencetransformers | no | none |
|
||||
| transformers-musicgen | no | none |
|
||||
| vall-e-x | no | none |
|
||||
| vllm | no | none |
|
||||
|
||||
**You can help by expanding this list.**
|
||||
|
||||
### System Prep
|
||||
|
||||
1. Check your GPU LLVM target is compatible with the version of ROCm. This can be found in the [LLVM Docs](https://llvm.org/docs/AMDGPUUsage.html).
|
||||
2. Check which ROCm version is compatible with your LLVM target and your chosen OS (pay special attention to supported kernel versions). See the following for compatability for ([ROCm 6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/reference/system-requirements.html)) or ([ROCm 6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html))
|
||||
3. Install you chosen version of the `dkms` and `rocm` (it is recommended that the native package manager be used for this process for any OS as version changes are executed more easily via this method if updates are required). Take care to restart after installing `amdgpu-dkms` and before installing `rocm`, for details regarding this see the installation documentation for your chosen OS ([6.0.2](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/native-install/index.html) or [6.0.0](https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.0.0/how-to/native-install/index.html))
|
||||
4. Deploy. Yes it's that easy.
|
||||
|
||||
#### Setup Example (Docker/containerd)
|
||||
|
||||
The following are examples of the ROCm specific configuration elements required.
|
||||
|
||||
```yaml
|
||||
# docker-compose.yaml
|
||||
# For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
|
||||
image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
|
||||
environment:
|
||||
- DEBUG=true
|
||||
# If your gpu is not already included in the current list of default targets the following build details are required.
|
||||
- REBUILD=true
|
||||
- BUILD_TYPE=hipblas
|
||||
- GPU_TARGETS=gfx906 # Example for Radeon VII
|
||||
devices:
|
||||
# AMD GPU only require the following devices be passed through to the container for offloading to occur.
|
||||
- /dev/dri
|
||||
- /dev/kfd
|
||||
```
|
||||
|
||||
The same can also be executed as a `run` for your container runtime
|
||||
|
||||
```
|
||||
docker run \
|
||||
-e DEBUG=true \
|
||||
-e REBUILD=true \
|
||||
-e BUILD_TYPE=hipblas \
|
||||
-e GPU_TARGETS=gfx906 \
|
||||
--device /dev/dri \
|
||||
--device /dev/kfd \
|
||||
quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
|
||||
```
|
||||
|
||||
Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command.
|
||||
|
||||
The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size.
|
||||
|
||||
#### Example (k8s) (Advanced Deployment/WIP)
|
||||
|
||||
For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu).
|
||||
For any k8s environment the documentation provided by AMD from the ROCm project should be successful. It is recommended that if you use rke2 or OpenShift that you deploy the SUSE or RedHat provided version of this resource to ensure compatability.
|
||||
After this has been completed the [helm chart from go-skynet](https://github.com/go-skynet/helm-charts) can be configured and deployed mostly un-edited.
|
||||
|
||||
The following are details of the changes that should be made to ensure proper function.
|
||||
While these details may be configurable in the `values.yaml` development of this Helm chart is ongoing and is subject to change.
|
||||
|
||||
The following details indicate the final state of the localai deployment relevant to GPU function.
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {NAME}-local-ai
|
||||
...
|
||||
spec:
|
||||
...
|
||||
template:
|
||||
...
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: HIP_VISIBLE_DEVICES
|
||||
value: '0'
|
||||
# This variable indicates the devices availible to container (0:device1 1:device2 2:device3) etc.
|
||||
# For multiple devices (say device 1 and 3) the value would be equivelant to HIP_VISIBLE_DEVICES="0,2"
|
||||
# Please take note of this when an iGPU is present in host system as compatability is not assured.
|
||||
...
|
||||
resources:
|
||||
limits:
|
||||
amd.com/gpu: '1'
|
||||
requests:
|
||||
amd.com/gpu: '1'
|
||||
```
|
||||
|
||||
This configuration has been tested on a 'custom' cluster managed by SUSE Rancher that was deployed on top of Ubuntu 22.04.4, certification of other configuration is ongoing and compatability is not gauranteed.
|
||||
|
||||
### Notes
|
||||
|
||||
- When installing the ROCM kernel driver on your system ensure that you are installing an equal or newer version that that which is currently implemented in LocalAI (6.0.0 at time of writing).
|
||||
- AMD documentation indicates that this will ensure functionality however your milage may vary depending on the GPU and distro you are using.
|
||||
- If you encounter an `Error 413` on attempting to upload an audio file or image for whisper or llava/bakllava on a k8s deployment, note that the ingress for your deployment may require the annontation `nginx.ingress.kubernetes.io/proxy-body-size: "25m"` to allow larger uploads. This may be included in future versions of the helm chart.
|
||||
|
||||
## Intel acceleration (sycl)
|
||||
|
||||
### Requirements
|
||||
|
||||
@@ -257,6 +257,10 @@ parameters:
|
||||
# swap_space: 2
|
||||
# Uncomment to specify the maximum length of a sequence (including prompt and output)
|
||||
# max_model_len: 32768
|
||||
# Uncomment and specify the number of Tensor divisions.
|
||||
# Allows you to partition and run large models. Performance gains are limited.
|
||||
# https://github.com/vllm-project/vllm/issues/1435
|
||||
# tensor_parallel_size: 2
|
||||
```
|
||||
|
||||
The backend will automatically download the required files in order to run the model.
|
||||
@@ -356,4 +360,4 @@ template:
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
```
|
||||
```
|
||||
|
||||
@@ -15,6 +15,7 @@ The list below is a list of software that integrates with LocalAI.
|
||||
- [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm)
|
||||
- [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI.
|
||||
- https://plugins.jetbrains.com/plugin/21056-codegpt allows for custom OpenAI compatible endpoints since 2.4.0
|
||||
- [Wave Terminal](https://docs.waveterm.dev/features/supportedLLMs/localai) has native support for LocalAI!
|
||||
- https://github.com/longy2k/obsidian-bmo-chatbot
|
||||
- https://github.com/FlowiseAI/Flowise
|
||||
- https://github.com/k8sgpt-ai/k8sgpt
|
||||
|
||||
@@ -56,7 +56,7 @@ icon = "info"
|
||||
|
||||
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler).
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||
|
||||
## Start LocalAI
|
||||
|
||||
@@ -9,13 +9,14 @@ All-In-One images are images that come pre-configured with a set of models and b
|
||||
|
||||
In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
|
||||
|
||||
| Category | Model name | Real model |
|
||||
| Text Generation | `gpt-4` | `phi-2`(CPU) or `hermes-2-pro-mistral`(GPU) |
|
||||
| Multimodal | `gpt-4-vision-preview` | `bakllava`(CPU) or `llava-1.6-mistral`(GPU) |
|
||||
| Text generation | `stablediffusion` | `stablediffusion`(CPU) `dreamshaper-8` (GPU) |
|
||||
| Audio transcription | `whisper-1` | `whisper` with the `whisper-base` model |
|
||||
| Text to Audio | `tts-1` | the `en-us-amy-low.onnx` model with `rhasspy` |
|
||||
| Embeddings | `text-embedding-ada-002` | |
|
||||
| Category | Model name | Real model (CPU) | Real model (GPU) |
|
||||
| ---- | ---- | ---- | ---- |
|
||||
| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
|
||||
| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` |
|
||||
| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` |
|
||||
| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
|
||||
| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
|
||||
| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
@@ -6,15 +6,22 @@ parameters:
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
||||
{{- if .Content}}
|
||||
{{.Content}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
||||
<|im_end|>
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
@@ -29,8 +36,7 @@ template:
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call>
|
||||
<|im_end|>
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
|
||||
48
embedded/models/llama3-instruct.yaml
Normal file
48
embedded/models/llama3-instruct.yaml
Normal file
@@ -0,0 +1,48 @@
|
||||
name: llama3-8b-instruct
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
|
||||
|
||||
template:
|
||||
chat_message: |
|
||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content -}}
|
||||
{{ else if .FunctionCall -}}
|
||||
{{ toJson .FunctionCall -}}
|
||||
{{ end -}}
|
||||
<|eot_id|>
|
||||
function: |
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
Function call:
|
||||
chat: |
|
||||
<|begin_of_text|>{{.Input }}
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "<|eot_id|>"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llama3-8b-instruct",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
@@ -1,4 +1,4 @@
|
||||
aiohttp==3.9.2
|
||||
aiohttp==3.9.4
|
||||
aiosignal==1.3.1
|
||||
async-timeout==4.0.2
|
||||
attrs==23.1.0
|
||||
@@ -20,7 +20,7 @@ numpy==1.24.3
|
||||
openai==0.27.6
|
||||
openapi-schema-pydantic==1.2.4
|
||||
packaging==23.1
|
||||
pydantic==1.10.7
|
||||
pydantic==1.10.13
|
||||
PyYAML==6.0
|
||||
requests==2.31.0
|
||||
SQLAlchemy==2.0.12
|
||||
|
||||
11
gallery/bert-embeddings.yaml
Normal file
11
gallery/bert-embeddings.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
name: "bert-embeddings"
|
||||
|
||||
config_file: |
|
||||
parameters:
|
||||
model: bert-MiniLM-L6-v2q4_0.bin
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
files:
|
||||
- filename: "bert-MiniLM-L6-v2q4_0.bin"
|
||||
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
|
||||
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
|
||||
19
gallery/cerbero.yaml
Normal file
19
gallery/cerbero.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
config_file: |
|
||||
backend: llama-cpp
|
||||
context_size: 8192
|
||||
f16: false
|
||||
name: cerbero
|
||||
|
||||
template:
|
||||
completion: "{{.Input}}"
|
||||
chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] "
|
||||
roles:
|
||||
user: "[|Umano|] "
|
||||
system: "[|Umano|] "
|
||||
assistant: "[|Assistente|] "
|
||||
|
||||
stopwords:
|
||||
- "[|Umano|]"
|
||||
|
||||
trimsuffix:
|
||||
- "\n"
|
||||
7
gallery/codellama.yaml
Normal file
7
gallery/codellama.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
name: "codellama"
|
||||
|
||||
config_file: |
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
mmap: true
|
||||
13
gallery/dreamshaper.yaml
Normal file
13
gallery/dreamshaper.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
name: "dreamshaper"
|
||||
|
||||
|
||||
config_file: |
|
||||
backend: diffusers
|
||||
step: 25
|
||||
f16: true
|
||||
|
||||
diffusers:
|
||||
pipeline_type: StableDiffusionPipeline
|
||||
cuda: true
|
||||
enable_parameters: "negative_prompt,num_inference_steps"
|
||||
scheduler_type: "k_dpmpp_2m"
|
||||
55
gallery/hermes-2-pro-mistral.yaml
Normal file
55
gallery/hermes-2-pro-mistral.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
name: "hermes-2-pro-mistral"
|
||||
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "\n</tool_call>"
|
||||
- "\n\n\n"
|
||||
|
||||
1288
gallery/index.yaml
Normal file
1288
gallery/index.yaml
Normal file
File diff suppressed because it is too large
Load Diff
43
gallery/llama3-instruct.yaml
Normal file
43
gallery/llama3-instruct.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
name: "llama3-instruct"
|
||||
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content -}}
|
||||
{{ else if .FunctionCall -}}
|
||||
{{ toJson .FunctionCall -}}
|
||||
{{ end -}}
|
||||
<|eot_id|>
|
||||
function: |
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
Function call:
|
||||
chat: |
|
||||
<|begin_of_text|>{{.Input }}
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 8192
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "<|eot_id|>"
|
||||
19
gallery/llava.yaml
Normal file
19
gallery/llava.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
name: "llava"
|
||||
|
||||
|
||||
config_file: |
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
mmap: true
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
53
gallery/noromaid.yaml
Normal file
53
gallery/noromaid.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
config_file: |
|
||||
mmap: true
|
||||
backend: llama-cpp
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_{{if eq .RoleName "assistant"}}bot{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}|>
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
<|im_system|>
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_bot|>
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_bot|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "\n</tool_call>"
|
||||
- "\n\n\n"
|
||||
|
||||
2
gallery/parler-tts.yaml
Normal file
2
gallery/parler-tts.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
config_file: |
|
||||
backend: parler-tts
|
||||
19
gallery/phi-2-chat.yaml
Normal file
19
gallery/phi-2-chat.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
name: "phi-2-chatml"
|
||||
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{.Content}}<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
|
||||
18
gallery/phi-2-orange.yaml
Normal file
18
gallery/phi-2-orange.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
name: "phi-2-orange"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}<|im_end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
18
gallery/phi-3-chat.yaml
Normal file
18
gallery/phi-3-chat.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
name: "phi-3-chat"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
template:
|
||||
chat_message: |
|
||||
<|{{ .RoleName }}|>
|
||||
{{.Content}}<|end|>
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|assistant|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|end|>
|
||||
|
||||
2
gallery/piper.yaml
Normal file
2
gallery/piper.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
config_file: |
|
||||
backend: piper
|
||||
2
gallery/rerankers.yaml
Normal file
2
gallery/rerankers.yaml
Normal file
@@ -0,0 +1,2 @@
|
||||
config_file: |
|
||||
backend: rerankers
|
||||
4
gallery/sentencetransformers.yaml
Normal file
4
gallery/sentencetransformers.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
name: "sentencetransformers"
|
||||
|
||||
config_file: |
|
||||
backend: sentencetransformers
|
||||
48
gallery/stablediffusion.yaml
Normal file
48
gallery/stablediffusion.yaml
Normal file
@@ -0,0 +1,48 @@
|
||||
name: "stablediffusion-cpp"
|
||||
|
||||
config_file: |
|
||||
name: stablediffusion-cpp
|
||||
backend: stablediffusion
|
||||
parameters:
|
||||
model: stablediffusion_assets
|
||||
|
||||
files:
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
||||
- filename: "stablediffusion_assets/vocab.txt"
|
||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
||||
36
gallery/tinydream.yaml
Normal file
36
gallery/tinydream.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
name: "tinydream"
|
||||
|
||||
config_file: |
|
||||
name: tinydream
|
||||
backend: tinydream
|
||||
parameters:
|
||||
model: tinydream_assets
|
||||
|
||||
files:
|
||||
- filename: "tinydream_assets/AutoencoderKL-fp16.bin"
|
||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin"
|
||||
- filename: "tinydream_assets/AutoencoderKL-fp16.param"
|
||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param"
|
||||
- filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin"
|
||||
- filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param"
|
||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param"
|
||||
- filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin"
|
||||
sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin"
|
||||
- filename: "tinydream_assets/RealESRGAN_x4plus_anime.param"
|
||||
sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param"
|
||||
- filename: "tinydream_assets/UNetModel-fp16.bin"
|
||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin"
|
||||
- filename: "tinydream_assets/UNetModel-fp16.param"
|
||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param"
|
||||
- filename: "tinydream_assets/vocab.txt"
|
||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt"
|
||||
21
gallery/vicuna-chat.yaml
Normal file
21
gallery/vicuna-chat.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
name: "vicuna-chat"
|
||||
|
||||
description: |
|
||||
Vicuna chat
|
||||
|
||||
license: "LLaMA"
|
||||
|
||||
config_file: |
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
roles:
|
||||
user: "User: "
|
||||
system: "System: "
|
||||
assistant: "Assistant: "
|
||||
f16: true
|
||||
template:
|
||||
completion: |
|
||||
Complete the following sentence: {{.Input}}
|
||||
chat: |
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
6
gallery/virtual.yaml
Normal file
6
gallery/virtual.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
name: "virtual"
|
||||
|
||||
description: |
|
||||
A Base model definition
|
||||
|
||||
license: "N/A"
|
||||
12
gallery/whisper-base.yaml
Normal file
12
gallery/whisper-base.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
name: "whisper-base"
|
||||
|
||||
|
||||
config_file: |
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: ggml-whisper-base.bin
|
||||
|
||||
files:
|
||||
- filename: "ggml-whisper-base.bin"
|
||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
||||
8
go.mod
8
go.mod
@@ -1,6 +1,8 @@
|
||||
module github.com/go-skynet/LocalAI
|
||||
|
||||
go 1.21
|
||||
go 1.21.1
|
||||
|
||||
toolchain go1.22.2
|
||||
|
||||
require (
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
|
||||
@@ -29,7 +31,7 @@ require (
|
||||
github.com/otiai10/openaigo v1.6.0
|
||||
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
|
||||
github.com/prometheus/client_golang v1.17.0
|
||||
github.com/rs/zerolog v1.31.0
|
||||
github.com/rs/zerolog v1.32.0
|
||||
github.com/russross/blackfriday v1.6.0
|
||||
github.com/sashabaranov/go-openai v1.20.4
|
||||
github.com/schollz/progressbar/v3 v3.13.1
|
||||
@@ -71,6 +73,7 @@ require (
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/chasefleming/elem-go v0.25.0 // indirect
|
||||
github.com/containerd/continuity v0.3.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dlclark/regexp2 v1.8.1 // indirect
|
||||
@@ -145,6 +148,7 @@ require (
|
||||
github.com/go-audio/riff v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/gofiber/contrib/fiberzerolog v1.0.0
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
|
||||
6
go.sum
6
go.sum
@@ -37,6 +37,8 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj
|
||||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
|
||||
github.com/charmbracelet/glamour v0.7.0/go.mod h1:jUMh5MeihljJPQbJ/wf4ldw2+yBP59+ctV36jASy7ps=
|
||||
github.com/chasefleming/elem-go v0.25.0 h1:LYzr1auk39Bh3bdKloArOFV7sOBnOfSOKxsg58eWL0Q=
|
||||
github.com/chasefleming/elem-go v0.25.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f6vg71RUilJAA4=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||
@@ -100,6 +102,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/contrib/fiberzerolog v1.0.0 h1:IB8q+NO2zPNS4VHKde1x5DqtMJ5vGrvDCydnAjlFw3E=
|
||||
github.com/gofiber/contrib/fiberzerolog v1.0.0/go.mod h1:SOi+Wo7RQlO/HV0jsYTu6uFQy+8ZPTzCZW4fDEKD3l8=
|
||||
github.com/gofiber/fiber/v2 v2.52.4 h1:P+T+4iK7VaqUsq2PALYEfBBo6bJZ4q3FP8cZ84EggTM=
|
||||
github.com/gofiber/fiber/v2 v2.52.4/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
|
||||
github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
|
||||
@@ -281,6 +285,8 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
|
||||
github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0=
|
||||
github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
|
||||
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
|
||||
github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
|
||||
|
||||
12
main.go
12
main.go
@@ -72,6 +72,7 @@ Version: ${version}
|
||||
kong.Vars{
|
||||
"basepath": kong.ExpandPath("."),
|
||||
"remoteLibraryURL": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml",
|
||||
"galleries": `[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]`,
|
||||
"version": internal.PrintableVersion(),
|
||||
},
|
||||
)
|
||||
@@ -91,17 +92,20 @@ Version: ${version}
|
||||
|
||||
switch *cli.CLI.LogLevel {
|
||||
case "error":
|
||||
log.Info().Msg("Setting logging to error")
|
||||
zerolog.SetGlobalLevel(zerolog.ErrorLevel)
|
||||
log.Info().Msg("Setting logging to error")
|
||||
case "warn":
|
||||
log.Info().Msg("Setting logging to warn")
|
||||
zerolog.SetGlobalLevel(zerolog.WarnLevel)
|
||||
log.Info().Msg("Setting logging to warn")
|
||||
case "info":
|
||||
log.Info().Msg("Setting logging to info")
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
log.Info().Msg("Setting logging to info")
|
||||
case "debug":
|
||||
log.Info().Msg("Setting logging to debug")
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
log.Debug().Msg("Setting logging to debug")
|
||||
case "trace":
|
||||
zerolog.SetGlobalLevel(zerolog.TraceLevel)
|
||||
log.Trace().Msg("Setting logging to trace")
|
||||
}
|
||||
|
||||
// Populate the application with the embedded backend assets
|
||||
|
||||
@@ -5,6 +5,8 @@ import "hash"
|
||||
type progressWriter struct {
|
||||
fileName string
|
||||
total int64
|
||||
fileNo int
|
||||
totalFiles int
|
||||
written int64
|
||||
downloadStatus func(string, string, string, float64)
|
||||
hash hash.Hash
|
||||
@@ -16,6 +18,17 @@ func (pw *progressWriter) Write(p []byte) (n int, err error) {
|
||||
|
||||
if pw.total > 0 {
|
||||
percentage := float64(pw.written) / float64(pw.total) * 100
|
||||
if pw.totalFiles > 1 {
|
||||
// This is a multi-file download
|
||||
// so we need to adjust the percentage
|
||||
// to reflect the progress of the whole download
|
||||
// This is the file pw.fileNo of pw.totalFiles files. We assume that
|
||||
// the files before successfully downloaded.
|
||||
percentage = percentage / float64(pw.totalFiles)
|
||||
if pw.fileNo > 1 {
|
||||
percentage += float64(pw.fileNo-1) * 100 / float64(pw.totalFiles)
|
||||
}
|
||||
}
|
||||
//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
} else {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user