mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 08:38:02 -04:00
Compare commits
76 Commits
v1.30.0
...
enable_gpu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8e91345e2 | ||
|
|
ea4ade6b60 | ||
|
|
803a0ac02a | ||
|
|
bde87d00b9 | ||
|
|
0eae727366 | ||
|
|
3b4c5d54d8 | ||
|
|
4e16bc2f13 | ||
|
|
562ac62f59 | ||
|
|
796d0c99aa | ||
|
|
e7fa2e06f8 | ||
|
|
8123f009d0 | ||
|
|
622aaa9f7d | ||
|
|
7b1ee203ce | ||
|
|
f347e51927 | ||
|
|
9b17af18b3 | ||
|
|
23c7fbfe6b | ||
|
|
035fea676a | ||
|
|
6e1a234d15 | ||
|
|
5b596ea605 | ||
|
|
6bd56460de | ||
|
|
6ef7ea2635 | ||
|
|
f8c00fbaf1 | ||
|
|
d9a42cc4c5 | ||
|
|
fc0bc32814 | ||
|
|
c62504ac92 | ||
|
|
f227e918f9 | ||
|
|
c132dbadce | ||
|
|
b839eb80a1 | ||
|
|
23b03a7f03 | ||
|
|
9196583651 | ||
|
|
fd28252e55 | ||
|
|
94f20e2eb7 | ||
|
|
5ced99a8e7 | ||
|
|
c377e61ff0 | ||
|
|
a6fe0a020a | ||
|
|
bf2ed3d752 | ||
|
|
d17a92eef3 | ||
|
|
1a7be035d3 | ||
|
|
004baaa30f | ||
|
|
ef19268418 | ||
|
|
e82470341f | ||
|
|
88fa42de75 | ||
|
|
432513c3ba | ||
|
|
45370c212b | ||
|
|
e91f660eb1 | ||
|
|
3f3162e57c | ||
|
|
208d1fce58 | ||
|
|
128694213f | ||
|
|
8034ed3473 | ||
|
|
d22069c59e | ||
|
|
5a04d32b39 | ||
|
|
ab65f3a17d | ||
|
|
4e23cbebcf | ||
|
|
63418c1afc | ||
|
|
8ca671761a | ||
|
|
81a5ed9f31 | ||
|
|
528b9d9206 | ||
|
|
1a4c57fac2 | ||
|
|
44a7045732 | ||
|
|
8ac7186185 | ||
|
|
975387f7ae | ||
|
|
d793b5af5e | ||
|
|
5188776224 | ||
|
|
07249c0446 | ||
|
|
188301f403 | ||
|
|
e660721a0c | ||
|
|
e029cc66bc | ||
|
|
e34b5f0119 | ||
|
|
c223364816 | ||
|
|
74fd5844ca | ||
|
|
4ebc86df84 | ||
|
|
8cd03eff58 | ||
|
|
46660a16a0 | ||
|
|
27b097309e | ||
|
|
d0fa1f8e94 | ||
|
|
55e38fea0e |
3
.env
3
.env
@@ -67,3 +67,6 @@ MODELS_PATH=/models
|
|||||||
### Default number of workers for GRPC Python backends.
|
### Default number of workers for GRPC Python backends.
|
||||||
### This actually controls wether a backend can process multiple requests or not.
|
### This actually controls wether a backend can process multiple requests or not.
|
||||||
# PYTHON_GRPC_MAX_WORKERS=1
|
# PYTHON_GRPC_MAX_WORKERS=1
|
||||||
|
|
||||||
|
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||||
|
# LLAMACPP_PARALLEL=1
|
||||||
5
.github/workflows/bump_deps.yaml
vendored
5
.github/workflows/bump_deps.yaml
vendored
@@ -12,6 +12,9 @@ jobs:
|
|||||||
- repository: "go-skynet/go-llama.cpp"
|
- repository: "go-skynet/go-llama.cpp"
|
||||||
variable: "GOLLAMA_VERSION"
|
variable: "GOLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
- repository: "ggerganov/llama.cpp"
|
||||||
|
variable: "CPPLLAMA_VERSION"
|
||||||
|
branch: "master"
|
||||||
- repository: "go-skynet/go-ggml-transformers.cpp"
|
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||||
variable: "GOGGMLTRANSFORMERS_VERSION"
|
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
@@ -41,7 +44,7 @@ jobs:
|
|||||||
branch: "master"
|
branch: "master"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
|
|||||||
94
.github/workflows/image.yml
vendored
94
.github/workflows/image.yml
vendored
@@ -14,15 +14,21 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
docker:
|
image-build:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: ''
|
tag-suffix: ''
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
|
- build-type: ''
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 11
|
cuda-major-version: 11
|
||||||
cuda-minor-version: 7
|
cuda-minor-version: 7
|
||||||
@@ -37,11 +43,6 @@ jobs:
|
|||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12'
|
tag-suffix: '-cublas-cuda12'
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
- build-type: ''
|
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 11
|
cuda-major-version: 11
|
||||||
cuda-minor-version: 7
|
cuda-minor-version: 7
|
||||||
@@ -57,43 +58,54 @@ jobs:
|
|||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: arc-runner-set
|
||||||
steps:
|
steps:
|
||||||
- name: Release space from worker
|
- name: Force Install GIT latest
|
||||||
run: |
|
run: |
|
||||||
echo "Listing top largest packages"
|
sudo apt-get update \
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
&& sudo apt-get install -y software-properties-common \
|
||||||
head -n 30 <<< "${pkgs}"
|
&& sudo apt-get update \
|
||||||
echo
|
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||||
df -h
|
&& sudo apt-get update \
|
||||||
echo
|
&& sudo apt-get install -y git
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
df -h
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
# - name: Release space from worker
|
||||||
|
# run: |
|
||||||
|
# echo "Listing top largest packages"
|
||||||
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
# head -n 30 <<< "${pkgs}"
|
||||||
|
# echo
|
||||||
|
# df -h
|
||||||
|
# echo
|
||||||
|
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
# sudo rm -rf /usr/local/lib/android
|
||||||
|
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
# sudo rm -rf /usr/share/dotnet
|
||||||
|
# sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
# sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
# sudo apt-get remove -y 'php.*' || true
|
||||||
|
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
# sudo apt-get remove -y '^google-.*' || true
|
||||||
|
# sudo apt-get remove -y azure-cli || true
|
||||||
|
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
# sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
# sudo apt-get remove -y microsoft-edge-stable || true
|
||||||
|
# sudo apt-get remove -y firefox || true
|
||||||
|
# sudo apt-get remove -y powershell || true
|
||||||
|
# sudo apt-get remove -y r-base-core || true
|
||||||
|
# sudo apt-get autoremove -y
|
||||||
|
# sudo apt-get clean
|
||||||
|
# echo
|
||||||
|
# echo "Listing top largest packages"
|
||||||
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
# head -n 30 <<< "${pkgs}"
|
||||||
|
# echo
|
||||||
|
# sudo rm -rfv build || true
|
||||||
|
# df -h
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
|
|||||||
18
.github/workflows/release.yaml
vendored
18
.github/workflows/release.yaml
vendored
@@ -19,7 +19,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v4
|
- uses: actions/setup-go@v4
|
||||||
@@ -29,6 +29,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && sudo make -j12 install
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
@@ -60,18 +66,26 @@ jobs:
|
|||||||
runs-on: macOS-latest
|
runs-on: macOS-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v4
|
- uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: '>=1.21.0'
|
go-version: '>=1.21.0'
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make -j12 install && rm -rf grpc
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
BUILD_ID: "${{ matrix.build }}"
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v3
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
|||||||
63
.github/workflows/test-gpu.yml
vendored
Normal file
63
.github/workflows/test-gpu.yml
vendored
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
---
|
||||||
|
name: 'GPU tests'
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
tags:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-latest:
|
||||||
|
runs-on: gpu
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
go-version: ['1.21.x']
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
# You can test your matrix by printing the current Go version
|
||||||
|
- name: Display Go version
|
||||||
|
run: go version
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
if [ ! -e /run/systemd/system ]; then
|
||||||
|
sudo mkdir /run/systemd/system
|
||||||
|
fi
|
||||||
|
sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
|
||||||
|
sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
|
||||||
|
make \
|
||||||
|
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||||
|
BUILD_TYPE=cublas \
|
||||||
|
prepare-e2e run-e2e-image test-e2e
|
||||||
|
- name: Release space from worker ♻
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
sudo rm -rf build || true
|
||||||
|
sudo rm -rf bin || true
|
||||||
|
sudo rm -rf dist || true
|
||||||
|
sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt
|
||||||
|
sudo cat logs.txt || true
|
||||||
|
sudo rm -rf logs.txt
|
||||||
|
make clean || true
|
||||||
|
make \
|
||||||
|
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||||
|
teardown-e2e || true
|
||||||
|
sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
|
||||||
|
docker system prune -f -a --volumes || true
|
||||||
89
.github/workflows/test.yml
vendored
89
.github/workflows/test.yml
vendored
@@ -14,14 +14,46 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest:
|
tests-linux:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
go-version: ['1.21.x']
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
|
- name: Release space from worker
|
||||||
|
run: |
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
df -h
|
||||||
|
echo
|
||||||
|
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo rm -rf /usr/local/lib/android
|
||||||
|
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
sudo rm -rf /usr/share/dotnet
|
||||||
|
sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
sudo apt-get remove -y 'php.*' || true
|
||||||
|
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
sudo apt-get remove -y '^google-.*' || true
|
||||||
|
sudo apt-get remove -y azure-cli || true
|
||||||
|
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
sudo apt-get autoremove -y
|
||||||
|
sudo apt-get clean
|
||||||
|
echo
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
sudo rm -rfv build || true
|
||||||
|
df -h
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
@@ -35,38 +67,43 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
sudo pip install -r extra/requirements.txt
|
|
||||||
|
|
||||||
sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/huggingface
|
||||||
tar -xzvf - && \
|
|
||||||
mkdir -p "spdlog-1.11.0/build" && \
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
cd "spdlog-1.11.0/build" && \
|
make go-piper && \
|
||||||
cmake .. && \
|
GO_TAGS="tts" make -C go-piper piper.o && \
|
||||||
make -j8 && \
|
sudo cp -rfv go-piper/piper/build/pi/lib/. /usr/lib/ && \
|
||||||
sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
|
||||||
cd /build && \
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
|
|
||||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
../.. && sudo make -j12 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
|
GO_TAGS="stablediffusion tts" make test
|
||||||
|
|
||||||
macOS-latest:
|
tests-apple:
|
||||||
runs-on: macOS-latest
|
runs-on: macOS-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
go-version: ['1.21.x']
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
@@ -76,6 +113,14 @@ jobs:
|
|||||||
# You can test your matrix by printing the current Go version
|
# You can test your matrix by printing the current Go version
|
||||||
- name: Display Go version
|
- name: Display Go version
|
||||||
run: go version
|
run: go version
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make -j12 install && rm -rf grpc
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -10,6 +10,8 @@ go-ggllm
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.a
|
*.a
|
||||||
get-sources
|
get-sources
|
||||||
|
/backend/cpp/llama/grpc-server
|
||||||
|
/backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
go-ggml-transformers
|
go-ggml-transformers
|
||||||
go-gpt2
|
go-gpt2
|
||||||
|
|||||||
159
Dockerfile
159
Dockerfile
@@ -1,22 +1,27 @@
|
|||||||
ARG GO_VERSION=1.21-bullseye
|
ARG GO_VERSION=1.21-bullseye
|
||||||
|
ARG IMAGE_TYPE=extras
|
||||||
|
# extras or core
|
||||||
|
|
||||||
FROM golang:$GO_VERSION as requirements
|
|
||||||
|
FROM golang:$GO_VERSION as requirements-core
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=11
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ARG CUDA_MINOR_VERSION=7
|
ARG CUDA_MINOR_VERSION=7
|
||||||
ARG SPDLOG_VERSION="1.11.0"
|
|
||||||
ARG PIPER_PHONEMIZE_VERSION='1.0.0'
|
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py,autogptq:/build/extra/grpc/autogptq/autogptq.py,bark:/build/extra/grpc/bark/ttsbark.py,diffusers:/build/extra/grpc/diffusers/backend_diffusers.py,exllama:/build/extra/grpc/exllama/exllama.py,vall-e-x:/build/extra/grpc/vall-e-x/ttsvalle.py,vllm:/build/extra/grpc/vllm/backend_vllm.py"
|
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/run.sh,autogptq:/build/extra/grpc/autogptq/run.sh,bark:/build/extra/grpc/bark/run.sh,diffusers:/build/extra/grpc/diffusers/run.sh,exllama:/build/extra/grpc/exllama/run.sh,vall-e-x:/build/extra/grpc/vall-e-x/run.sh,vllm:/build/extra/grpc/vllm/run.sh"
|
||||||
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y ca-certificates cmake curl patch pip
|
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
|
||||||
|
|
||||||
|
|
||||||
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
|
RUN update-ca-certificates
|
||||||
|
|
||||||
# Use the variables in subsequent instructions
|
# Use the variables in subsequent instructions
|
||||||
RUN echo "Target Architecture: $TARGETARCH"
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
@@ -30,66 +35,62 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|||||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
||||||
rm -f cuda-keyring_1.0-1_all.deb && \
|
rm -f cuda-keyring_1.0-1_all.deb && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||||
; fi
|
; fi
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
|
# OpenBLAS requirements and stable diffusion
|
||||||
|
RUN apt-get install -y \
|
||||||
|
libopenblas-dev \
|
||||||
|
libopencv-dev \
|
||||||
|
&& apt-get clean
|
||||||
|
|
||||||
|
# Set up OpenCV
|
||||||
|
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
RUN test -n "$TARGETARCH" \
|
||||||
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
# Extras requirements
|
# Extras requirements
|
||||||
|
FROM requirements-core as requirements-extras
|
||||||
|
|
||||||
|
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y conda
|
||||||
|
|
||||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
COPY extra/requirements.txt /build/extra/requirements.txt
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
RUN pip install --upgrade pip
|
RUN pip install --upgrade pip
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
#RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||||
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
# pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
||||||
fi
|
# fi
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
#RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
||||||
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
# pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
||||||
fi
|
# fi
|
||||||
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
#RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
||||||
|
|
||||||
# Vall-e-X
|
# Vall-e-X
|
||||||
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
# OpenBLAS requirements
|
|
||||||
RUN apt-get install -y libopenblas-dev
|
|
||||||
|
|
||||||
# Stable Diffusion requirements
|
|
||||||
RUN apt-get install -y libopencv-dev && \
|
|
||||||
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
|
||||||
|
|
||||||
|
|
||||||
# piper requirements
|
|
||||||
# Use pre-compiled Piper phonemization library (includes onnxruntime)
|
|
||||||
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
|
|
||||||
RUN test -n "$TARGETARCH" \
|
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|
||||||
|
|
||||||
RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
|
|
||||||
tar -xzvf - && \
|
|
||||||
mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
|
|
||||||
cd "spdlog-${SPDLOG_VERSION}/build" && \
|
|
||||||
cmake .. && \
|
|
||||||
make -j8 && \
|
|
||||||
cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
|
||||||
cd /build && \
|
|
||||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
|
||||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \
|
|
||||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
|
||||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
|
||||||
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
|
||||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
|
||||||
# \
|
# \
|
||||||
# ; fi
|
# ; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements as builder
|
FROM requirements-${IMAGE_TYPE} as builder
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
|
ARG GRPC_BACKENDS
|
||||||
|
ARG BUILD_GRPC=true
|
||||||
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
@@ -104,21 +105,43 @@ RUN make prepare
|
|||||||
COPY . .
|
COPY . .
|
||||||
COPY .git .
|
COPY .git .
|
||||||
|
|
||||||
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
|
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make -j12 install && rm -rf grpc \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
# Rebuild with defaults backends
|
||||||
|
RUN make build
|
||||||
|
|
||||||
|
RUN if [ ! -d "/build/go-piper/piper/build/pi/lib/" ]; then \
|
||||||
|
mkdir -p /build/go-piper/piper/build/pi/lib/ \
|
||||||
|
touch /build/go-piper/piper/build/pi/lib/keep \
|
||||||
|
; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements
|
FROM requirements-${IMAGE_TYPE}
|
||||||
|
|
||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
|
ARG IMAGE_TYPE=extras
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV REBUILD=false
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
|
|
||||||
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
apt-get install -y ffmpeg \
|
apt-get install -y ffmpeg \
|
||||||
@@ -132,15 +155,49 @@ WORKDIR /build
|
|||||||
# https://github.com/go-skynet/LocalAI/pull/434
|
# https://github.com/go-skynet/LocalAI/pull/434
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN make prepare-sources
|
RUN make prepare-sources
|
||||||
|
|
||||||
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
|
|
||||||
# Copy VALLE-X as it's not a real "lib"
|
# Copy shared libraries for piper
|
||||||
RUN cp -rfv /usr/lib/vall-e-x/* ./
|
COPY --from=builder /build/go-piper/piper/build/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# To resolve exllama import error
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
|
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
|
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/autogptq \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/bark \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/diffusers \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/vllm \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/huggingface \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/vall-e-x \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/exllama \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
# Copy VALLE-X as it's not a real "lib"
|
||||||
|
RUN if [ -d /usr/lib/vall-e-x ]; then \
|
||||||
|
cp -rfv /usr/lib/vall-e-x/* ./ ; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# we also copy exllama libs over to resolve exllama import error
|
||||||
|
RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
|
||||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||||
|
|||||||
179
Makefile
179
Makefile
@@ -4,10 +4,12 @@ GOVET=$(GOCMD) vet
|
|||||||
BINARY_NAME=local-ai
|
BINARY_NAME=local-ai
|
||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_VERSION?=d9f6176409de0a2b5ce798de502545c6721e346e
|
GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
||||||
|
|
||||||
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||||
|
|
||||||
|
CPPLLAMA_VERSION?=a75fa576abba9d37f463580c379e4bbf1e1ad03c
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
||||||
@@ -26,24 +28,23 @@ WHISPER_CPP_VERSION?=85ed71aaec8e0612a84c0b67804bde75aa75a273
|
|||||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
|
PIPER_VERSION?=736f6fb639ab8e3397356e48eeb6bdcb9da88a78
|
||||||
|
|
||||||
# go-bloomz version
|
|
||||||
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
|
|
||||||
|
|
||||||
# stablediffusion version
|
# stablediffusion version
|
||||||
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
||||||
|
|
||||||
# Go-ggllm
|
|
||||||
GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b
|
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
export CMAKE_ARGS?=
|
export CMAKE_ARGS?=
|
||||||
CGO_LDFLAGS?=
|
CGO_LDFLAGS?=
|
||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=git
|
BUILD_ID?=git
|
||||||
|
|
||||||
|
TEST_DIR=/tmp/test
|
||||||
|
|
||||||
|
RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||||
|
|
||||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||||
# go tool nm ./local-ai | grep Commit
|
# go tool nm ./local-ai | grep Commit
|
||||||
LD_FLAGS?=
|
LD_FLAGS?=
|
||||||
@@ -51,7 +52,6 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION
|
|||||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||||
|
|
||||||
OPTIONAL_TARGETS?=
|
OPTIONAL_TARGETS?=
|
||||||
ESPEAK_DATA?=
|
|
||||||
|
|
||||||
OS := $(shell uname -s)
|
OS := $(shell uname -s)
|
||||||
ARCH := $(shell uname -m)
|
ARCH := $(shell uname -m)
|
||||||
@@ -61,6 +61,9 @@ WHITE := $(shell tput -Txterm setaf 7)
|
|||||||
CYAN := $(shell tput -Txterm setaf 6)
|
CYAN := $(shell tput -Txterm setaf 6)
|
||||||
RESET := $(shell tput -Txterm sgr0)
|
RESET := $(shell tput -Txterm sgr0)
|
||||||
|
|
||||||
|
# Default Docker bridge IP
|
||||||
|
E2E_BRIDGE_IP?=172.17.0.1
|
||||||
|
|
||||||
ifndef UNAME_S
|
ifndef UNAME_S
|
||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
endif
|
endif
|
||||||
@@ -82,6 +85,18 @@ ifeq ($(BUILD_TYPE),cublas)
|
|||||||
export LLAMA_CUBLAS=1
|
export LLAMA_CUBLAS=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
ROCM_HOME ?= /opt/rocm
|
||||||
|
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||||
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
|
# Llama-stable has no hipblas support, so override it here.
|
||||||
|
export STABLE_BUILD_TYPE=
|
||||||
|
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||||
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||||
export LLAMA_METAL=1
|
export LLAMA_METAL=1
|
||||||
@@ -104,10 +119,18 @@ endif
|
|||||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
|
PIPER_CGO_CXXFLAGS+=-I$(shell pwd)/go-piper/piper/src/cpp -I$(shell pwd)/go-piper/piper/build/fi/include -I$(shell pwd)/go-piper/piper/build/pi/include -I$(shell pwd)/go-piper/piper/build/si/include
|
||||||
|
PIPER_CGO_LDFLAGS+=-L$(shell pwd)/go-piper/piper/build/fi/lib -L$(shell pwd)/go-piper/piper/build/pi/lib -L$(shell pwd)/go-piper/piper/build/si/lib -lfmt -lspdlog
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||||
|
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||||
|
|
||||||
|
# If empty, then we build all
|
||||||
|
ifeq ($(GRPC_BACKENDS),)
|
||||||
|
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
|
||||||
|
endif
|
||||||
|
|
||||||
.PHONY: all test build vendor
|
.PHONY: all test build vendor
|
||||||
|
|
||||||
@@ -118,14 +141,6 @@ gpt4all:
|
|||||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
## go-ggllm
|
|
||||||
go-ggllm:
|
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
|
|
||||||
cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
go-ggllm/libggllm.a: go-ggllm
|
|
||||||
$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
|
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
go-piper:
|
go-piper:
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
||||||
@@ -152,14 +167,6 @@ go-rwkv:
|
|||||||
go-rwkv/librwkv.a: go-rwkv
|
go-rwkv/librwkv.a: go-rwkv
|
||||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
## bloomz
|
|
||||||
bloomz:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
|
||||||
cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
bloomz/libbloomz.a: bloomz
|
|
||||||
cd bloomz && make libbloomz.a
|
|
||||||
|
|
||||||
go-bert/libgobert.a: go-bert
|
go-bert/libgobert.a: go-bert
|
||||||
$(MAKE) -C go-bert libgobert.a
|
$(MAKE) -C go-bert libgobert.a
|
||||||
|
|
||||||
@@ -169,14 +176,10 @@ backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
|||||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||||
|
|
||||||
backend-assets/espeak-ng-data:
|
backend-assets/espeak-ng-data: go-piper
|
||||||
mkdir -p backend-assets/espeak-ng-data
|
mkdir -p backend-assets/espeak-ng-data
|
||||||
ifdef ESPEAK_DATA
|
$(MAKE) -C go-piper piper.o
|
||||||
@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
|
@cp -rf go-piper/piper/build/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||||
else
|
|
||||||
@echo "ESPEAK_DATA not set, skipping tts. Note that this will break the tts functionality."
|
|
||||||
@touch backend-assets/espeak-ng-data/keep
|
|
||||||
endif
|
|
||||||
|
|
||||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
@@ -208,12 +211,12 @@ go-llama/libbinding.a: go-llama
|
|||||||
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
go-llama-stable/libbinding.a: go-llama-stable
|
go-llama-stable/libbinding.a: go-llama-stable
|
||||||
$(MAKE) -C go-llama-stable BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
go-piper/libpiper_binding.a:
|
go-piper/libpiper_binding.a: go-piper
|
||||||
$(MAKE) -C go-piper libpiper_binding.a example/main
|
$(MAKE) -C go-piper libpiper_binding.a example/main
|
||||||
|
|
||||||
get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
|
get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion
|
||||||
touch $@
|
touch $@
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
@@ -222,10 +225,8 @@ replace:
|
|||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
|
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
@@ -241,9 +242,7 @@ rebuild: ## Rebuilds the project
|
|||||||
$(MAKE) -C whisper.cpp clean
|
$(MAKE) -C whisper.cpp clean
|
||||||
$(MAKE) -C go-stable-diffusion clean
|
$(MAKE) -C go-stable-diffusion clean
|
||||||
$(MAKE) -C go-bert clean
|
$(MAKE) -C go-bert clean
|
||||||
$(MAKE) -C bloomz clean
|
|
||||||
$(MAKE) -C go-piper clean
|
$(MAKE) -C go-piper clean
|
||||||
$(MAKE) -C go-ggllm clean
|
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
@@ -261,12 +260,14 @@ clean: ## Remove build related file
|
|||||||
rm -rf ./backend-assets
|
rm -rf ./backend-assets
|
||||||
rm -rf ./go-rwkv
|
rm -rf ./go-rwkv
|
||||||
rm -rf ./go-bert
|
rm -rf ./go-bert
|
||||||
rm -rf ./bloomz
|
|
||||||
rm -rf ./whisper.cpp
|
rm -rf ./whisper.cpp
|
||||||
rm -rf ./go-piper
|
rm -rf ./go-piper
|
||||||
rm -rf ./go-ggllm
|
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
rm -rf release/
|
rm -rf release/
|
||||||
|
rm -rf ./backend/cpp/grpc/grpc_repo
|
||||||
|
rm -rf ./backend/cpp/grpc/build
|
||||||
|
rm -rf ./backend/cpp/grpc/installed_packages
|
||||||
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
|
|
||||||
@@ -289,12 +290,12 @@ run: prepare ## run local-ai
|
|||||||
test-models/testmodel:
|
test-models/testmodel:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
||||||
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
wget https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||||
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
prepare-test: grpcs
|
prepare-test: grpcs
|
||||||
@@ -305,14 +306,34 @@ test: prepare test-models/testmodel grpcs
|
|||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion"
|
export GO_TAGS="tts stablediffusion"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
|
||||||
$(MAKE) test-gpt4all
|
$(MAKE) test-gpt4all
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
$(MAKE) test-stablediffusion
|
$(MAKE) test-stablediffusion
|
||||||
|
|
||||||
|
prepare-e2e:
|
||||||
|
mkdir -p $(TEST_DIR)
|
||||||
|
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||||
|
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||||
|
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
||||||
|
|
||||||
|
run-e2e-image:
|
||||||
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
|
test-e2e:
|
||||||
|
@echo 'Running e2e tests'
|
||||||
|
BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
|
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
||||||
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
|
||||||
|
|
||||||
|
teardown-e2e:
|
||||||
|
rm -rf $(TEST_DIR) || true
|
||||||
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
test-gpt4all: prepare-test
|
test-gpt4all: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
@@ -365,14 +386,20 @@ protogen-python:
|
|||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vllm/ --grpc_python_out=extra/grpc/vllm/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vllm/ --grpc_python_out=extra/grpc/vllm/ pkg/grpc/proto/backend.proto
|
||||||
|
|
||||||
## GRPC
|
## GRPC
|
||||||
|
# Note: it is duplicated in the Dockerfile
|
||||||
|
prepare-extra-conda-environments:
|
||||||
|
$(MAKE) -C extra/grpc/autogptq
|
||||||
|
$(MAKE) -C extra/grpc/bark
|
||||||
|
$(MAKE) -C extra/grpc/diffusers
|
||||||
|
$(MAKE) -C extra/grpc/vllm
|
||||||
|
$(MAKE) -C extra/grpc/huggingface
|
||||||
|
$(MAKE) -C extra/grpc/vall-e-x
|
||||||
|
$(MAKE) -C extra/grpc/exllama
|
||||||
|
|
||||||
|
|
||||||
backend-assets/grpc:
|
backend-assets/grpc:
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/
|
|
||||||
|
|
||||||
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||||
@@ -382,6 +409,37 @@ ifeq ($(BUILD_TYPE),metal)
|
|||||||
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
## BACKEND CPP LLAMA START
|
||||||
|
# Sets the variables in case it has to build the gRPC locally.
|
||||||
|
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
||||||
|
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||||
|
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||||
|
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||||
|
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||||
|
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||||
|
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||||
|
|
||||||
|
backend/cpp/llama/grpc-server:
|
||||||
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
|
backend/cpp/grpc/script/build_grpc.sh ${INSTALLED_PACKAGES}
|
||||||
|
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
|
||||||
|
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
|
||||||
|
export PATH=${PATH}:${INSTALLED_PACKAGES}/bin && \
|
||||||
|
CMAKE_ARGS="${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
|
else
|
||||||
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
|
endif
|
||||||
|
## BACKEND CPP LLAMA END
|
||||||
|
|
||||||
|
##
|
||||||
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||||
|
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||||
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
|
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
|
backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||||
@@ -427,10 +485,6 @@ backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
|
|||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
||||||
|
|
||||||
backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/
|
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
||||||
@@ -438,12 +492,15 @@ backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
|||||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: backend-assets/grpc go-stable-diffusion/libstablediffusion.a
|
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
||||||
|
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
||||||
|
$(MAKE) go-stable-diffusion/libstablediffusion.a; \
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/; \
|
||||||
|
fi
|
||||||
|
|
||||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
|
||||||
|
|
||||||
backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
|
backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/api/schema"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
"github.com/go-skynet/LocalAI/internal"
|
"github.com/go-skynet/LocalAI/internal"
|
||||||
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@@ -120,6 +121,9 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
|
|
||||||
// Default middleware config
|
// Default middleware config
|
||||||
app.Use(recover.New())
|
app.Use(recover.New())
|
||||||
|
if options.Metrics != nil {
|
||||||
|
app.Use(metrics.APIMiddleware(options.Metrics))
|
||||||
|
}
|
||||||
|
|
||||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||||
auth := func(c *fiber.Ctx) error {
|
auth := func(c *fiber.Ctx) error {
|
||||||
@@ -229,5 +233,7 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||||
app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||||
|
|
||||||
|
app.Get("/metrics", metrics.MetricsHandler())
|
||||||
|
|
||||||
return app, nil
|
return app, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
|
|
||||||
. "github.com/go-skynet/LocalAI/api"
|
. "github.com/go-skynet/LocalAI/api"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||||
"github.com/go-skynet/LocalAI/pkg/model"
|
"github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
@@ -162,8 +163,12 @@ var _ = Describe("API test", func() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
options.WithGalleries(galleries),
|
options.WithGalleries(galleries),
|
||||||
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
|
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
|
||||||
@@ -452,7 +457,7 @@ var _ = Describe("API test", func() {
|
|||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||||
return response["processed"].(bool)
|
return response["processed"].(bool)
|
||||||
}, "360s", "10s").Should(Equal(true))
|
}, "960s", "10s").Should(Equal(true))
|
||||||
|
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
@@ -479,9 +484,13 @@ var _ = Describe("API test", func() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
options.WithAudioDir(tmpdir),
|
options.WithAudioDir(tmpdir),
|
||||||
options.WithImageDir(tmpdir),
|
options.WithImageDir(tmpdir),
|
||||||
options.WithGalleries(galleries),
|
options.WithGalleries(galleries),
|
||||||
@@ -583,12 +592,15 @@ var _ = Describe("API test", func() {
|
|||||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||||
c, cancel = context.WithCancel(context.Background())
|
c, cancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
var err error
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
options.WithModelLoader(modelLoader),
|
options.WithModelLoader(modelLoader),
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
)...)
|
)...)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
go app.Listen("127.0.0.1:9090")
|
go app.Listen("127.0.0.1:9090")
|
||||||
@@ -675,7 +687,7 @@ var _ = Describe("API test", func() {
|
|||||||
Input: []string{"sun", "cat"},
|
Input: []string{"sun", "cat"},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred(), err)
|
||||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||||
|
|
||||||
@@ -792,10 +804,13 @@ var _ = Describe("API test", func() {
|
|||||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||||
c, cancel = context.WithCancel(context.Background())
|
c, cancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
var err error
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
options.WithModelLoader(modelLoader),
|
options.WithModelLoader(modelLoader),
|
||||||
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||||||
PipelineType: c.Diffusers.PipelineType,
|
PipelineType: c.Diffusers.PipelineType,
|
||||||
CFGScale: c.Diffusers.CFGScale,
|
CFGScale: c.Diffusers.CFGScale,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
|
LoraScale: c.LoraScale,
|
||||||
LoraBase: c.LoraBase,
|
LoraBase: c.LoraBase,
|
||||||
IMG2IMG: c.Diffusers.IMG2IMG,
|
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||||
CLIPModel: c.Diffusers.ClipModel,
|
CLIPModel: c.Diffusers.ClipModel,
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ type TokenUsage struct {
|
|||||||
Completion int
|
Completion int
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(c)
|
grpcOpts := gRPCModelOpts(c)
|
||||||
@@ -72,6 +72,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
|||||||
fn := func() (LLMResponse, error) {
|
fn := func() (LLMResponse, error) {
|
||||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||||
opts.Prompt = s
|
opts.Prompt = s
|
||||||
|
opts.Images = images
|
||||||
|
|
||||||
tokenUsage := TokenUsage{}
|
tokenUsage := TokenUsage{}
|
||||||
|
|
||||||
|
|||||||
@@ -45,8 +45,14 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
|||||||
DraftModel: c.DraftModel,
|
DraftModel: c.DraftModel,
|
||||||
AudioPath: c.VallE.AudioPath,
|
AudioPath: c.VallE.AudioPath,
|
||||||
Quantization: c.Quantization,
|
Quantization: c.Quantization,
|
||||||
|
MMProj: c.MMProj,
|
||||||
|
YarnExtFactor: c.YarnExtFactor,
|
||||||
|
YarnAttnFactor: c.YarnAttnFactor,
|
||||||
|
YarnBetaFast: c.YarnBetaFast,
|
||||||
|
YarnBetaSlow: c.YarnBetaSlow,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
LoraBase: c.LoraBase,
|
LoraBase: c.LoraBase,
|
||||||
|
LoraScale: c.LoraScale,
|
||||||
NGQA: c.NGQA,
|
NGQA: c.NGQA,
|
||||||
RMSNormEps: c.RMSNormEps,
|
RMSNormEps: c.RMSNormEps,
|
||||||
F16Memory: c.F16,
|
F16Memory: c.F16,
|
||||||
|
|||||||
@@ -100,10 +100,18 @@ type LLMConfig struct {
|
|||||||
NUMA bool `yaml:"numa"`
|
NUMA bool `yaml:"numa"`
|
||||||
LoraAdapter string `yaml:"lora_adapter"`
|
LoraAdapter string `yaml:"lora_adapter"`
|
||||||
LoraBase string `yaml:"lora_base"`
|
LoraBase string `yaml:"lora_base"`
|
||||||
|
LoraScale float32 `yaml:"lora_scale"`
|
||||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||||
DraftModel string `yaml:"draft_model"`
|
DraftModel string `yaml:"draft_model"`
|
||||||
NDraft int32 `yaml:"n_draft"`
|
NDraft int32 `yaml:"n_draft"`
|
||||||
Quantization string `yaml:"quantization"`
|
Quantization string `yaml:"quantization"`
|
||||||
|
MMProj string `yaml:"mmproj"`
|
||||||
|
|
||||||
|
RopeScaling string `yaml:"rope_scaling"`
|
||||||
|
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
|
||||||
|
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
||||||
|
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
||||||
|
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type AutoGPTQ struct {
|
type AutoGPTQ struct {
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
@@ -15,15 +16,20 @@ import (
|
|||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
emptyMessage := ""
|
emptyMessage := ""
|
||||||
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
|
|
||||||
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||||
initialMessage := schema.OpenAIResponse{
|
initialMessage := schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
@@ -32,6 +38,8 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
|
|
||||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||||
resp := schema.OpenAIResponse{
|
resp := schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
@@ -73,6 +81,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ResponseFormat == "json_object" {
|
||||||
|
input.Grammar = grammar.JSONBNF
|
||||||
|
}
|
||||||
|
|
||||||
// process functions if we have any defined or if we have a function call string
|
// process functions if we have any defined or if we have a function call string
|
||||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||||
log.Debug().Msgf("Response needs to process functions")
|
log.Debug().Msgf("Response needs to process functions")
|
||||||
@@ -132,14 +144,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
r := config.Roles[role]
|
r := config.Roles[role]
|
||||||
contentExists := i.Content != nil && *i.Content != ""
|
contentExists := i.Content != nil && i.StringContent != ""
|
||||||
// First attempt to populate content via a chat message specific template
|
// First attempt to populate content via a chat message specific template
|
||||||
if config.TemplateConfig.ChatMessage != "" {
|
if config.TemplateConfig.ChatMessage != "" {
|
||||||
chatMessageData := model.ChatMessageTemplateData{
|
chatMessageData := model.ChatMessageTemplateData{
|
||||||
SystemPrompt: config.SystemPrompt,
|
SystemPrompt: config.SystemPrompt,
|
||||||
Role: r,
|
Role: r,
|
||||||
RoleName: role,
|
RoleName: role,
|
||||||
Content: *i.Content,
|
Content: i.StringContent,
|
||||||
MessageIndex: messageIndex,
|
MessageIndex: messageIndex,
|
||||||
}
|
}
|
||||||
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
||||||
@@ -158,7 +170,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
if content == "" {
|
if content == "" {
|
||||||
if r != "" {
|
if r != "" {
|
||||||
if contentExists {
|
if contentExists {
|
||||||
content = fmt.Sprint(r, " ", *i.Content)
|
content = fmt.Sprint(r, i.StringContent)
|
||||||
}
|
}
|
||||||
if i.FunctionCall != nil {
|
if i.FunctionCall != nil {
|
||||||
j, err := json.Marshal(i.FunctionCall)
|
j, err := json.Marshal(i.FunctionCall)
|
||||||
@@ -172,7 +184,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if contentExists {
|
if contentExists {
|
||||||
content = fmt.Sprint(*i.Content)
|
content = fmt.Sprint(i.StringContent)
|
||||||
}
|
}
|
||||||
if i.FunctionCall != nil {
|
if i.FunctionCall != nil {
|
||||||
j, err := json.Marshal(i.FunctionCall)
|
j, err := json.Marshal(i.FunctionCall)
|
||||||
@@ -261,6 +273,8 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
resp := &schema.OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []schema.Choice{
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
@@ -324,7 +338,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
||||||
// Note: This costs (in term of CPU) another computation
|
// Note: This costs (in term of CPU) another computation
|
||||||
config.Grammar = ""
|
config.Grammar = ""
|
||||||
predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
|
images := []string{}
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
images = append(images, m.StringImages...)
|
||||||
|
}
|
||||||
|
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Msgf("inference error: %s", err.Error())
|
log.Error().Msgf("inference error: %s", err.Error())
|
||||||
return
|
return
|
||||||
@@ -355,6 +373,8 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
resp := &schema.OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "chat.completion",
|
Object: "chat.completion",
|
||||||
|
|||||||
@@ -6,22 +6,30 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/api/schema"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
// https://platform.openai.com/docs/api-reference/completions
|
// https://platform.openai.com/docs/api-reference/completions
|
||||||
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
|
|
||||||
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||||
resp := schema.OpenAIResponse{
|
resp := schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []schema.Choice{
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
@@ -57,6 +65,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ResponseFormat == "json_object" {
|
||||||
|
input.Grammar = grammar.JSONBNF
|
||||||
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
if input.Stream {
|
if input.Stream {
|
||||||
@@ -108,6 +120,8 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
}
|
}
|
||||||
|
|
||||||
resp := &schema.OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []schema.Choice{
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
@@ -156,6 +170,8 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
}
|
}
|
||||||
|
|
||||||
resp := &schema.OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "text_completion",
|
Object: "text_completion",
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package openai
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
@@ -10,6 +11,7 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/schema"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
@@ -62,7 +64,11 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
result = append(result, r...)
|
result = append(result, r...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
resp := &schema.OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "edit",
|
Object: "edit",
|
||||||
|
|||||||
@@ -3,10 +3,12 @@ package openai
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/schema"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@@ -57,7 +59,11 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
resp := &schema.OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Data: items,
|
Data: items,
|
||||||
Object: "list",
|
Object: "list",
|
||||||
|
|||||||
@@ -5,11 +5,14 @@ import (
|
|||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/go-skynet/LocalAI/api/schema"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
@@ -174,7 +177,11 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
resp := &schema.OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Data: result,
|
Data: result,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -23,8 +23,13 @@ func ComputeChoices(
|
|||||||
n = 1
|
n = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
images := []string{}
|
||||||
|
for _, m := range req.Messages {
|
||||||
|
images = append(images, m.StringImages...)
|
||||||
|
}
|
||||||
|
|
||||||
// get the model function to call for the result
|
// get the model function to call for the result
|
||||||
predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
|
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, backend.TokenUsage{}, err
|
return result, backend.TokenUsage{}, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,8 +2,11 @@ package openai
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -24,7 +27,7 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
|||||||
input.Cancel = cancel
|
input.Cancel = cancel
|
||||||
// Get input data from the request body
|
// Get input data from the request body
|
||||||
if err := c.BodyParser(input); err != nil {
|
if err := c.BodyParser(input); err != nil {
|
||||||
return "", nil, err
|
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
modelFile := input.Model
|
modelFile := input.Model
|
||||||
@@ -61,6 +64,37 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
|||||||
return modelFile, input, nil
|
return modelFile, input, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||||
|
// encodes it in base64 and returns the base64 string
|
||||||
|
func getBase64Image(s string) (string, error) {
|
||||||
|
if strings.HasPrefix(s, "http") {
|
||||||
|
// download the image
|
||||||
|
resp, err := http.Get(s)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// read the image data into memory
|
||||||
|
data, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode the image data in base64
|
||||||
|
encoded := base64.StdEncoding.EncodeToString(data)
|
||||||
|
|
||||||
|
// return the base64 string
|
||||||
|
return encoded, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
|
||||||
|
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
|
||||||
|
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("not valid string")
|
||||||
|
}
|
||||||
|
|
||||||
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||||
if input.Echo {
|
if input.Echo {
|
||||||
config.Echo = input.Echo
|
config.Echo = input.Echo
|
||||||
@@ -129,6 +163,35 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decode each request's message content
|
||||||
|
index := 0
|
||||||
|
for i, m := range input.Messages {
|
||||||
|
switch content := m.Content.(type) {
|
||||||
|
case string:
|
||||||
|
input.Messages[i].StringContent = content
|
||||||
|
case []interface{}:
|
||||||
|
dat, _ := json.Marshal(content)
|
||||||
|
c := []schema.Content{}
|
||||||
|
json.Unmarshal(dat, &c)
|
||||||
|
for _, pp := range c {
|
||||||
|
if pp.Type == "text" {
|
||||||
|
input.Messages[i].StringContent = pp.Text
|
||||||
|
} else if pp.Type == "image_url" {
|
||||||
|
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
|
||||||
|
base64, err := getBase64Image(pp.ImageURL.URL)
|
||||||
|
if err == nil {
|
||||||
|
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
// set a placeholder for each image
|
||||||
|
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
|
||||||
|
index++
|
||||||
|
} else {
|
||||||
|
fmt.Print("Failed encoding image", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if input.RepeatPenalty != 0 {
|
if input.RepeatPenalty != 0 {
|
||||||
config.RepeatPenalty = input.RepeatPenalty
|
config.RepeatPenalty = input.RepeatPenalty
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -24,6 +25,7 @@ type Option struct {
|
|||||||
PreloadModelsFromPath string
|
PreloadModelsFromPath string
|
||||||
CORSAllowOrigins string
|
CORSAllowOrigins string
|
||||||
ApiKeys []string
|
ApiKeys []string
|
||||||
|
Metrics *metrics.Metrics
|
||||||
|
|
||||||
Galleries []gallery.Gallery
|
Galleries []gallery.Gallery
|
||||||
|
|
||||||
@@ -198,3 +200,9 @@ func WithApiKeys(apiKeys []string) AppOption {
|
|||||||
o.ApiKeys = apiKeys
|
o.ApiKeys = apiKeys
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithMetrics(meter *metrics.Metrics) AppOption {
|
||||||
|
return func(o *Option) {
|
||||||
|
o.Metrics = meter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -55,11 +55,25 @@ type Choice struct {
|
|||||||
Text string `json:"text,omitempty"`
|
Text string `json:"text,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Content struct {
|
||||||
|
Type string `json:"type" yaml:"type"`
|
||||||
|
Text string `json:"text" yaml:"text"`
|
||||||
|
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ContentURL struct {
|
||||||
|
URL string `json:"url" yaml:"url"`
|
||||||
|
}
|
||||||
|
|
||||||
type Message struct {
|
type Message struct {
|
||||||
// The message role
|
// The message role
|
||||||
Role string `json:"role,omitempty" yaml:"role"`
|
Role string `json:"role,omitempty" yaml:"role"`
|
||||||
// The message content
|
// The message content
|
||||||
Content *string `json:"content" yaml:"content"`
|
Content interface{} `json:"content" yaml:"content"`
|
||||||
|
|
||||||
|
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
||||||
|
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
||||||
|
|
||||||
// A result of a function call
|
// A result of a function call
|
||||||
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
||||||
}
|
}
|
||||||
|
|||||||
3
backend/cpp/grpc/.gitignore
vendored
Normal file
3
backend/cpp/grpc/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
installed_packages/
|
||||||
|
grpc_build/
|
||||||
|
grpc_repo/
|
||||||
81
backend/cpp/grpc/script/build_grpc.sh
Executable file
81
backend/cpp/grpc/script/build_grpc.sh
Executable file
@@ -0,0 +1,81 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Builds locally from sources the packages needed by the llama cpp backend.
|
||||||
|
|
||||||
|
# Makes sure a few base packages exist.
|
||||||
|
# sudo apt-get --no-upgrade -y install g++ gcc binutils cmake git build-essential autoconf libtool pkg-config
|
||||||
|
|
||||||
|
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||||
|
echo "Script directory: $SCRIPT_DIR"
|
||||||
|
|
||||||
|
CPP_INSTALLED_PACKAGES_DIR=$1
|
||||||
|
if [ -z ${CPP_INSTALLED_PACKAGES_DIR} ]; then
|
||||||
|
echo "CPP_INSTALLED_PACKAGES_DIR env variable not set. Don't know where to install: failed.";
|
||||||
|
echo
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d "${CPP_INSTALLED_PACKAGES_DIR}" ]; then
|
||||||
|
echo "gRPC installation directory already exists. Nothing to do."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# The depth when cloning a git repo. 1 speeds up the clone when the repo history is not needed.
|
||||||
|
GIT_CLONE_DEPTH=1
|
||||||
|
|
||||||
|
NUM_BUILD_THREADS=$(nproc --ignore=1)
|
||||||
|
|
||||||
|
# Google gRPC --------------------------------------------------------------------------------------
|
||||||
|
TAG_LIB_GRPC="v1.59.0"
|
||||||
|
GIT_REPO_LIB_GRPC="https://github.com/grpc/grpc.git"
|
||||||
|
GRPC_REPO_DIR="${SCRIPT_DIR}/../grpc_repo"
|
||||||
|
GRPC_BUILD_DIR="${SCRIPT_DIR}/../grpc_build"
|
||||||
|
SRC_DIR_LIB_GRPC="${GRPC_REPO_DIR}/grpc"
|
||||||
|
|
||||||
|
echo "SRC_DIR_LIB_GRPC: ${SRC_DIR_LIB_GRPC}"
|
||||||
|
echo "GRPC_REPO_DIR: ${GRPC_REPO_DIR}"
|
||||||
|
echo "GRPC_BUILD_DIR: ${GRPC_BUILD_DIR}"
|
||||||
|
|
||||||
|
mkdir -pv ${GRPC_REPO_DIR}
|
||||||
|
|
||||||
|
rm -rf ${GRPC_BUILD_DIR}
|
||||||
|
mkdir -pv ${GRPC_BUILD_DIR}
|
||||||
|
|
||||||
|
mkdir -pv ${CPP_INSTALLED_PACKAGES_DIR}
|
||||||
|
|
||||||
|
if [ -d "${SRC_DIR_LIB_GRPC}" ]; then
|
||||||
|
echo "gRPC source already exists locally. Not cloned again."
|
||||||
|
else
|
||||||
|
( cd ${GRPC_REPO_DIR} && \
|
||||||
|
git clone --depth ${GIT_CLONE_DEPTH} -b ${TAG_LIB_GRPC} ${GIT_REPO_LIB_GRPC} && \
|
||||||
|
cd ${SRC_DIR_LIB_GRPC} && \
|
||||||
|
git submodule update --init --recursive --depth ${GIT_CLONE_DEPTH}
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
( cd ${GRPC_BUILD_DIR} && \
|
||||||
|
cmake -G "Unix Makefiles" \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DgRPC_INSTALL=ON \
|
||||||
|
-DEXECUTABLE_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/bin \
|
||||||
|
-DLIBRARY_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/lib \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
-DgRPC_BUILD_CSHARP_EXT=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_CPP_PLUGIN=ON \
|
||||||
|
-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_PYTHON_PLUGIN=ON \
|
||||||
|
-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF \
|
||||||
|
-Dprotobuf_WITH_ZLIB=ON \
|
||||||
|
-DRE2_BUILD_TESTING=OFF \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=${CPP_INSTALLED_PACKAGES_DIR}/ \
|
||||||
|
${SRC_DIR_LIB_GRPC} && \
|
||||||
|
cmake --build . -- -j ${NUM_BUILD_THREADS} && \
|
||||||
|
cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
||||||
|
)
|
||||||
|
|
||||||
|
rm -rf ${GRPC_BUILD_DIR}
|
||||||
|
rm -rf ${GRPC_REPO_DIR}
|
||||||
|
|
||||||
74
backend/cpp/llama/CMakeLists.txt
Normal file
74
backend/cpp/llama/CMakeLists.txt
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
set(TARGET myclip)
|
||||||
|
add_library(${TARGET} clip.cpp clip.h)
|
||||||
|
install(TARGETS ${TARGET} LIBRARY)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
if (NOT MSVC)
|
||||||
|
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(TARGET grpc-server)
|
||||||
|
# END CLIP hack
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
cmake_minimum_required(VERSION 3.15)
|
||||||
|
set(TARGET grpc-server)
|
||||||
|
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||||
|
set(_REFLECTION grpc++_reflection)
|
||||||
|
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||||
|
link_directories("/opt/homebrew/lib")
|
||||||
|
include_directories("/opt/homebrew/include")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_package(absl CONFIG REQUIRED)
|
||||||
|
find_package(Protobuf CONFIG REQUIRED)
|
||||||
|
find_package(gRPC CONFIG REQUIRED)
|
||||||
|
|
||||||
|
find_program(_PROTOBUF_PROTOC protoc)
|
||||||
|
set(_GRPC_GRPCPP grpc++)
|
||||||
|
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
||||||
|
|
||||||
|
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
include_directories(${Protobuf_INCLUDE_DIRS})
|
||||||
|
|
||||||
|
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
|
||||||
|
# Proto file
|
||||||
|
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE)
|
||||||
|
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
||||||
|
|
||||||
|
# Generated sources
|
||||||
|
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc")
|
||||||
|
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h")
|
||||||
|
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc")
|
||||||
|
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h")
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
|
||||||
|
COMMAND ${_PROTOBUF_PROTOC}
|
||||||
|
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||||
|
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||||
|
-I "${hw_proto_path}"
|
||||||
|
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
|
||||||
|
"${hw_proto}"
|
||||||
|
DEPENDS "${hw_proto}")
|
||||||
|
|
||||||
|
# hw_grpc_proto
|
||||||
|
add_library(hw_grpc_proto
|
||||||
|
${hw_grpc_srcs}
|
||||||
|
${hw_grpc_hdrs}
|
||||||
|
${hw_proto_srcs}
|
||||||
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
|
add_executable(${TARGET} grpc-server.cpp json.hpp )
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
|
absl::flags_parse
|
||||||
|
gRPC::${_REFLECTION}
|
||||||
|
gRPC::${_GRPC_GRPCPP}
|
||||||
|
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
if(TARGET BUILD_INFO)
|
||||||
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
|
endif()
|
||||||
50
backend/cpp/llama/Makefile
Normal file
50
backend/cpp/llama/Makefile
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
|
||||||
|
LLAMA_VERSION?=d9b33fe95bd257b36c84ee5769cc048230067d6f
|
||||||
|
|
||||||
|
CMAKE_ARGS?=
|
||||||
|
BUILD_TYPE?=
|
||||||
|
|
||||||
|
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||||
|
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
|
# to CMAKE_ARGS automatically
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
|
# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
else ifeq ($(BUILD_TYPE),clblast)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
llama.cpp:
|
||||||
|
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||||
|
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
llama.cpp/examples/grpc-server:
|
||||||
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
|
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||||
|
|
||||||
|
rebuild:
|
||||||
|
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
rm -rf grpc-server
|
||||||
|
$(MAKE) grpc-server
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf llama.cpp
|
||||||
|
rm -rf grpc-server
|
||||||
|
|
||||||
|
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||||
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||||
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
2192
backend/cpp/llama/grpc-server.cpp
Normal file
2192
backend/cpp/llama/grpc-server.cpp
Normal file
File diff suppressed because it is too large
Load Diff
24596
backend/cpp/llama/json.hpp
Normal file
24596
backend/cpp/llama/json.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,23 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
bloomz "github.com/go-skynet/LocalAI/pkg/backend/llm/bloomz"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// GRPC Falcon server
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
falcon "github.com/go-skynet/LocalAI/pkg/backend/llm/falcon"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
0
custom-ca-certs/.keep
Normal file
0
custom-ca-certs/.keep
Normal file
@@ -12,4 +12,5 @@ services:
|
|||||||
- .env
|
- .env
|
||||||
volumes:
|
volumes:
|
||||||
- ./models:/models:cached
|
- ./models:/models:cached
|
||||||
|
- ./images/:/tmp/generated/images/
|
||||||
command: ["/usr/bin/local-ai" ]
|
command: ["/usr/bin/local-ai" ]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ cd /build
|
|||||||
|
|
||||||
if [ "$REBUILD" != "false" ]; then
|
if [ "$REBUILD" != "false" ]; then
|
||||||
rm -rf ./local-ai
|
rm -rf ./local-ai
|
||||||
ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build -j${BUILD_PARALLELISM:-1}
|
make build -j${BUILD_PARALLELISM:-1}
|
||||||
else
|
else
|
||||||
echo "@@@@@"
|
echo "@@@@@"
|
||||||
echo "Skipping rebuild"
|
echo "Skipping rebuild"
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
|
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||||
|
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||||
|
|
||||||
OPENAI_API_KEY=sk---anystringhere
|
OPENAI_API_KEY=sk---anystringhere
|
||||||
OPENAI_API_BASE=http://api:8080/v1
|
OPENAI_API_BASE=http://api:8080/v1
|
||||||
# Models to preload at start
|
# Models to preload at start
|
||||||
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
|
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings,
|
||||||
|
# see other options in the model gallery at https://github.com/go-skynet/model-gallery
|
||||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
|
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
|
||||||
@@ -10,12 +10,16 @@ git clone https://github.com/go-skynet/LocalAI
|
|||||||
|
|
||||||
cd LocalAI/examples/autoGPT
|
cd LocalAI/examples/autoGPT
|
||||||
|
|
||||||
|
cp -rfv .env.example .env
|
||||||
|
|
||||||
|
# Edit the .env file to set a different model by editing `PRELOAD_MODELS`.
|
||||||
|
vim .env
|
||||||
|
|
||||||
docker-compose run --rm auto-gpt
|
docker-compose run --rm auto-gpt
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
|
Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
|
||||||
|
|
||||||
See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
|
|
||||||
|
|
||||||
## Without docker
|
## Without docker
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: backend monitor
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: backend-shutdown
|
||||||
|
type: http
|
||||||
|
seq: 3
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
5
examples/bruno/LocalAI Test Requests/bruno.json
Normal file
5
examples/bruno/LocalAI Test Requests/bruno.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"version": "1",
|
||||||
|
"name": "LocalAI Test Requests",
|
||||||
|
"type": "collection"
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
vars {
|
||||||
|
HOST: localhost
|
||||||
|
PORT: 8080
|
||||||
|
DEFAULT_MODEL: gpt-3.5-turbo
|
||||||
|
PROTOCOL: http://
|
||||||
|
}
|
||||||
11
examples/bruno/LocalAI Test Requests/get models list.bru
Normal file
11
examples/bruno/LocalAI Test Requests/get models list.bru
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: get models list
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
meta {
|
||||||
|
name: Generate image
|
||||||
|
type: http
|
||||||
|
seq: 1
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"prompt": "<positive prompt>|<negative prompt>",
|
||||||
|
"model": "model-name",
|
||||||
|
"step": 51,
|
||||||
|
"size": "1024x1024",
|
||||||
|
"image": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
meta {
|
||||||
|
name: -completions
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"prompt": "function downloadFile(string url, string outputPath) {",
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
23
examples/bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
23
examples/bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
meta {
|
||||||
|
name: -edits
|
||||||
|
type: http
|
||||||
|
seq: 5
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "What day of the wek is it?",
|
||||||
|
"instruction": "Fix the spelling mistakes"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: -embeddings
|
||||||
|
type: http
|
||||||
|
seq: 6
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
meta {
|
||||||
|
name: chat completion -simple- 1 message-
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "How could one use friction to cook an egg?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.2,
|
||||||
|
"grammar": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
meta {
|
||||||
|
name: chat-completions -long-
|
||||||
|
type: http
|
||||||
|
seq: 5
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
||||||
|
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
||||||
|
{"role": "assistant",
|
||||||
|
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
meta {
|
||||||
|
name: chat-completions -stream-
|
||||||
|
type: http
|
||||||
|
seq: 6
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.9,
|
||||||
|
"stream": true
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: add model gallery
|
||||||
|
type: http
|
||||||
|
seq: 10
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
||||||
|
"name": "test"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: delete model gallery
|
||||||
|
type: http
|
||||||
|
seq: 11
|
||||||
|
}
|
||||||
|
|
||||||
|
delete {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"name": "test"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: list MODELS in galleries
|
||||||
|
type: http
|
||||||
|
seq: 7
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: list model GALLERIES
|
||||||
|
type: http
|
||||||
|
seq: 8
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: model gallery apply -gist-
|
||||||
|
type: http
|
||||||
|
seq: 12
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: model gallery apply
|
||||||
|
type: http
|
||||||
|
seq: 9
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
||||||
|
"name": "codellama7b"
|
||||||
|
}
|
||||||
|
}
|
||||||
22
examples/bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
22
examples/bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: -tts
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||||
|
}
|
||||||
|
}
|
||||||
16
examples/chainlit/Dockerfile
Normal file
16
examples/chainlit/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Use an official Python runtime as a parent image
|
||||||
|
FROM harbor.home.sfxworks.net/docker/library/python:3.9-slim
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the current directory contents into the container at /app
|
||||||
|
COPY requirements.txt /app
|
||||||
|
|
||||||
|
# Install any needed packages specified in requirements.txt
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
|
# Run app.py when the container launches
|
||||||
|
CMD ["chainlit", "run", "-h", "--host", "0.0.0.0", "main.py" ]
|
||||||
25
examples/chainlit/README.md
Normal file
25
examples/chainlit/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# LocalAI Demonstration with Embeddings and Chainlit
|
||||||
|
|
||||||
|
This demonstration shows you how to use embeddings with existing data in `LocalAI`, and how to integrate it with Chainlit for an interactive querying experience. We are using the `llama_index` library to facilitate the embedding and querying processes, and `chainlit` to provide an interactive interface. The `Weaviate` client is used as the embedding source.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before proceeding, make sure you have the following installed:
|
||||||
|
- Weaviate client
|
||||||
|
- LocalAI and its dependencies
|
||||||
|
- Chainlit and its dependencies
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
1. Clone this repository:
|
||||||
|
2. Navigate to the project directory:
|
||||||
|
3. Run the example: `chainlit run main.py`
|
||||||
|
|
||||||
|
# Highlight on `llama_index` and `chainlit`
|
||||||
|
|
||||||
|
`llama_index` is the key library that facilitates the process of embedding and querying data in LocalAI. It provides a seamless interface to integrate various components, such as `WeaviateVectorStore`, `LocalAI`, `ServiceContext`, and more, for a smooth querying experience.
|
||||||
|
|
||||||
|
`chainlit` is used to provide an interactive interface for users to query the data and see the results in real-time. It integrates with llama_index to handle the querying process and display the results to the user.
|
||||||
|
|
||||||
|
In this example, `llama_index` is used to set up the `VectorStoreIndex` and `QueryEngine`, and `chainlit` is used to handle the user interactions with `LocalAI` and display the results.
|
||||||
|
|
||||||
16
examples/chainlit/config.yaml
Normal file
16
examples/chainlit/config.yaml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
localAI:
|
||||||
|
temperature: 0
|
||||||
|
modelName: gpt-3.5-turbo
|
||||||
|
apiBase: http://local-ai.default
|
||||||
|
apiKey: stub
|
||||||
|
streaming: True
|
||||||
|
weviate:
|
||||||
|
url: http://weviate.local
|
||||||
|
index: AIChroma
|
||||||
|
query:
|
||||||
|
mode: hybrid
|
||||||
|
topK: 1
|
||||||
|
alpha: 0.0
|
||||||
|
chunkSize: 1024
|
||||||
|
embedding:
|
||||||
|
model: BAAI/bge-small-en-v1.5
|
||||||
82
examples/chainlit/main.py
Normal file
82
examples/chainlit/main.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import weaviate
|
||||||
|
from llama_index.storage.storage_context import StorageContext
|
||||||
|
from llama_index.vector_stores import WeaviateVectorStore
|
||||||
|
|
||||||
|
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
|
||||||
|
from llama_index.callbacks.base import CallbackManager
|
||||||
|
from llama_index import (
|
||||||
|
LLMPredictor,
|
||||||
|
ServiceContext,
|
||||||
|
StorageContext,
|
||||||
|
VectorStoreIndex,
|
||||||
|
)
|
||||||
|
import chainlit as cl
|
||||||
|
|
||||||
|
from llama_index.llms import LocalAI
|
||||||
|
from llama_index.embeddings import HuggingFaceEmbedding
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# Load the configuration file
|
||||||
|
with open("config.yaml", "r") as ymlfile:
|
||||||
|
cfg = yaml.safe_load(ymlfile)
|
||||||
|
|
||||||
|
# Get the values from the configuration file or set the default values
|
||||||
|
temperature = cfg['localAI'].get('temperature', 0)
|
||||||
|
model_name = cfg['localAI'].get('modelName', "gpt-3.5-turbo")
|
||||||
|
api_base = cfg['localAI'].get('apiBase', "http://local-ai.default")
|
||||||
|
api_key = cfg['localAI'].get('apiKey', "stub")
|
||||||
|
streaming = cfg['localAI'].get('streaming', True)
|
||||||
|
weaviate_url = cfg['weviate'].get('url', "http://weviate.default")
|
||||||
|
index_name = cfg['weviate'].get('index', "AIChroma")
|
||||||
|
query_mode = cfg['query'].get('mode', "hybrid")
|
||||||
|
topK = cfg['query'].get('topK', 1)
|
||||||
|
alpha = cfg['query'].get('alpha', 0.0)
|
||||||
|
embed_model_name = cfg['embedding'].get('model', "BAAI/bge-small-en-v1.5")
|
||||||
|
chunk_size = cfg['query'].get('chunkSize', 1024)
|
||||||
|
|
||||||
|
|
||||||
|
embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
|
||||||
|
|
||||||
|
|
||||||
|
llm = LocalAI(temperature=temperature, model_name=model_name, api_base=api_base, api_key=api_key, streaming=streaming)
|
||||||
|
llm.globally_use_chat_completions = True;
|
||||||
|
client = weaviate.Client(weaviate_url)
|
||||||
|
vector_store = WeaviateVectorStore(weaviate_client=client, index_name=index_name)
|
||||||
|
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
||||||
|
|
||||||
|
@cl.on_chat_start
|
||||||
|
async def factory():
|
||||||
|
|
||||||
|
llm_predictor = LLMPredictor(
|
||||||
|
llm=llm
|
||||||
|
)
|
||||||
|
|
||||||
|
service_context = ServiceContext.from_defaults(embed_model=embed_model, callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), llm_predictor=llm_predictor, chunk_size=chunk_size)
|
||||||
|
|
||||||
|
index = VectorStoreIndex.from_vector_store(
|
||||||
|
vector_store,
|
||||||
|
storage_context=storage_context,
|
||||||
|
service_context=service_context
|
||||||
|
)
|
||||||
|
|
||||||
|
query_engine = index.as_query_engine(vector_store_query_mode=query_mode, similarity_top_k=topK, alpha=alpha, streaming=True)
|
||||||
|
|
||||||
|
cl.user_session.set("query_engine", query_engine)
|
||||||
|
|
||||||
|
|
||||||
|
@cl.on_message
|
||||||
|
async def main(message: cl.Message):
|
||||||
|
query_engine = cl.user_session.get("query_engine")
|
||||||
|
response = await cl.make_async(query_engine.query)(message.content)
|
||||||
|
|
||||||
|
response_message = cl.Message(content="")
|
||||||
|
|
||||||
|
for token in response.response_gen:
|
||||||
|
await response_message.stream_token(token=token)
|
||||||
|
|
||||||
|
if response.response_txt:
|
||||||
|
response_message.content = response.response_txt
|
||||||
|
|
||||||
|
await response_message.send()
|
||||||
7
examples/chainlit/requirements.txt
Normal file
7
examples/chainlit/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
llama_hub==0.0.41
|
||||||
|
llama_index==0.8.55
|
||||||
|
Requests==2.31.0
|
||||||
|
weaviate_client==3.25.1
|
||||||
|
transformers
|
||||||
|
torch
|
||||||
|
chainlit
|
||||||
1
examples/chatbot-ui-manual/models
Symbolic link
1
examples/chatbot-ui-manual/models
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../models
|
||||||
42
examples/configurations/README.md
Normal file
42
examples/configurations/README.md
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
## Advanced configuration
|
||||||
|
|
||||||
|
This section contains examples on how to install models manually with config files.
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
First clone LocalAI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/go-skynet/LocalAI
|
||||||
|
|
||||||
|
cd LocalAI
|
||||||
|
```
|
||||||
|
|
||||||
|
Setup the model you prefer from the examples below and then start LocalAI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d --pull always
|
||||||
|
```
|
||||||
|
|
||||||
|
If LocalAI is already started, you can restart it with
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose restart
|
||||||
|
```
|
||||||
|
|
||||||
|
See also the getting started: https://localai.io/basics/getting_started/
|
||||||
|
|
||||||
|
### Mistral
|
||||||
|
|
||||||
|
To setup mistral copy the files inside `mistral` in the `models` folder:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp -r examples/configurations/mistral/* models/
|
||||||
|
```
|
||||||
|
|
||||||
|
Now download the model:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf -O models/mistral-7b-openorca.Q6_K.gguf
|
||||||
|
```
|
||||||
|
|
||||||
18
examples/configurations/llava/README.md
Normal file
18
examples/configurations/llava/README.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|

|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir models
|
||||||
|
wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf -O models/ggml-model-q4_k.gguf
|
||||||
|
wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf -O models/mmproj-model-f16.gguf
|
||||||
|
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:master --models-path /models --threads 4
|
||||||
|
```
|
||||||
|
|
||||||
|
## Try it out
|
||||||
|
|
||||||
|
```
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "llava",
|
||||||
|
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||||
|
```
|
||||||
3
examples/configurations/llava/chat-simple.tmpl
Normal file
3
examples/configurations/llava/chat-simple.tmpl
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||||
|
{{.Input}}
|
||||||
|
ASSISTANT:
|
||||||
20
examples/configurations/llava/llava.yaml
Normal file
20
examples/configurations/llava/llava.yaml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
context_size: 4096
|
||||||
|
f16: true
|
||||||
|
threads: 11
|
||||||
|
gpu_layers: 90
|
||||||
|
name: llava
|
||||||
|
mmap: true
|
||||||
|
backend: llama-cpp
|
||||||
|
roles:
|
||||||
|
user: "USER:"
|
||||||
|
assistant: "ASSISTANT:"
|
||||||
|
system: "SYSTEM:"
|
||||||
|
parameters:
|
||||||
|
model: ggml-model-q4_k.gguf
|
||||||
|
temperature: 0.2
|
||||||
|
top_k: 40
|
||||||
|
top_p: 0.95
|
||||||
|
template:
|
||||||
|
chat: chat-simple
|
||||||
|
mmproj: mmproj-model-f16.gguf
|
||||||
3
examples/configurations/mistral/chatml-block.tmpl
Normal file
3
examples/configurations/mistral/chatml-block.tmpl
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{{.Input}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
|
||||||
3
examples/configurations/mistral/chatml.tmpl
Normal file
3
examples/configurations/mistral/chatml.tmpl
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||||
|
{{if .Content}}{{.Content}}{{end}}
|
||||||
|
<|im_end|>
|
||||||
16
examples/configurations/mistral/mistral.yaml
Normal file
16
examples/configurations/mistral/mistral.yaml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
name: mistral
|
||||||
|
mmap: true
|
||||||
|
parameters:
|
||||||
|
model: mistral-7b-openorca.Q6_K.gguf
|
||||||
|
temperature: 0.2
|
||||||
|
top_k: 40
|
||||||
|
top_p: 0.95
|
||||||
|
template:
|
||||||
|
chat_message: chatml
|
||||||
|
chat: chatml-block
|
||||||
|
completion: completion
|
||||||
|
context_size: 4096
|
||||||
|
f16: true
|
||||||
|
stopwords:
|
||||||
|
- <|im_end|>
|
||||||
|
threads: 4
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||||
|
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||||
|
|
||||||
OPENAI_API_KEY=x
|
OPENAI_API_KEY=x
|
||||||
DISCORD_BOT_TOKEN=x
|
DISCORD_BOT_TOKEN=x
|
||||||
DISCORD_CLIENT_ID=x
|
DISCORD_CLIENT_ID=x
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
../chatbot-ui/models/
|
../models
|
||||||
@@ -1,7 +1,11 @@
|
|||||||
|
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||||
|
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||||
|
|
||||||
OPENAI_API_KEY=sk---anystringhere
|
OPENAI_API_KEY=sk---anystringhere
|
||||||
OPENAI_API_BASE=http://api:8080/v1
|
OPENAI_API_BASE=http://api:8080/v1
|
||||||
# Models to preload at start
|
# Models to preload at start
|
||||||
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
|
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings,
|
||||||
|
# see other options in the model gallery at https://github.com/go-skynet/model-gallery
|
||||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/openllama-7b-open-instruct.yaml", "name": "gpt-3.5-turbo"}]
|
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/openllama-7b-open-instruct.yaml", "name": "gpt-3.5-turbo"}]
|
||||||
|
|
||||||
## Change the default number of threads
|
## Change the default number of threads
|
||||||
@@ -10,9 +10,12 @@ git clone https://github.com/go-skynet/LocalAI
|
|||||||
|
|
||||||
cd LocalAI/examples/functions
|
cd LocalAI/examples/functions
|
||||||
|
|
||||||
|
cp -rfv .env.example .env
|
||||||
|
|
||||||
|
# Edit the .env file to set a different model by editing `PRELOAD_MODELS`.
|
||||||
|
vim .env
|
||||||
|
|
||||||
docker-compose run --rm functions
|
docker-compose run --rm functions
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: The example automatically downloads the `openllama` model as it is under a permissive license.
|
Note: The example automatically downloads the `openllama` model as it is under a permissive license.
|
||||||
|
|
||||||
See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
|
|
||||||
|
|||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||||
|
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||||
|
|
||||||
THREADS=4
|
THREADS=4
|
||||||
CONTEXT_SIZE=512
|
CONTEXT_SIZE=512
|
||||||
MODELS_PATH=/models
|
MODELS_PATH=/models
|
||||||
|
|||||||
1
examples/langchain-chroma/models
Symbolic link
1
examples/langchain-chroma/models
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../models
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
name: gpt-3.5-turbo
|
|
||||||
parameters:
|
|
||||||
model: ggml-gpt4all-j
|
|
||||||
top_k: 80
|
|
||||||
temperature: 0.2
|
|
||||||
top_p: 0.7
|
|
||||||
context_size: 1024
|
|
||||||
stopwords:
|
|
||||||
- "HUMAN:"
|
|
||||||
- "GPT:"
|
|
||||||
roles:
|
|
||||||
user: " "
|
|
||||||
system: " "
|
|
||||||
template:
|
|
||||||
completion: completion
|
|
||||||
chat: gpt4all
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
|
||||||
### Prompt:
|
|
||||||
{{.Input}}
|
|
||||||
### Response:
|
|
||||||
1
examples/langchain-huggingface/models
Symbolic link
1
examples/langchain-huggingface/models
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../models
|
||||||
@@ -1 +0,0 @@
|
|||||||
{{.Input}}
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
name: gpt-3.5-turbo
|
|
||||||
parameters:
|
|
||||||
model: gpt2
|
|
||||||
top_k: 80
|
|
||||||
temperature: 0.2
|
|
||||||
top_p: 0.7
|
|
||||||
context_size: 1024
|
|
||||||
backend: "langchain-huggingface"
|
|
||||||
stopwords:
|
|
||||||
- "HUMAN:"
|
|
||||||
- "GPT:"
|
|
||||||
roles:
|
|
||||||
user: " "
|
|
||||||
system: " "
|
|
||||||
template:
|
|
||||||
completion: completion
|
|
||||||
chat: gpt4all
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
|
||||||
### Prompt:
|
|
||||||
{{.Input}}
|
|
||||||
### Response:
|
|
||||||
1
examples/langchain/models
Symbolic link
1
examples/langchain/models
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../models
|
||||||
@@ -1 +0,0 @@
|
|||||||
{{.Input}}
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
name: gpt-3.5-turbo
|
|
||||||
parameters:
|
|
||||||
model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
|
|
||||||
top_k: 80
|
|
||||||
temperature: 0.2
|
|
||||||
top_p: 0.7
|
|
||||||
context_size: 1024
|
|
||||||
stopwords:
|
|
||||||
- "HUMAN:"
|
|
||||||
- "GPT:"
|
|
||||||
roles:
|
|
||||||
user: " "
|
|
||||||
system: " "
|
|
||||||
backend: "gptj"
|
|
||||||
template:
|
|
||||||
completion: completion
|
|
||||||
chat: gpt4all
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
|
||||||
### Prompt:
|
|
||||||
{{.Input}}
|
|
||||||
### Response:
|
|
||||||
30
examples/llamaindex/README.md
Normal file
30
examples/llamaindex/README.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# LocalAI Demonstration with Embeddings
|
||||||
|
|
||||||
|
This demonstration shows you how to use embeddings with existing data in LocalAI. We are using the `llama_index` library to facilitate the embedding and querying processes. The `Weaviate` client is used as the embedding source.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before proceeding, make sure you have the following installed:
|
||||||
|
- Weaviate client
|
||||||
|
- LocalAI and its dependencies
|
||||||
|
- llama_index and its dependencies
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
1. Clone this repository:
|
||||||
|
|
||||||
|
2. Navigate to the project directory:
|
||||||
|
|
||||||
|
3. Run the example:
|
||||||
|
|
||||||
|
`python main.py`
|
||||||
|
|
||||||
|
```
|
||||||
|
Downloading (…)lve/main/config.json: 100%|███████████████████████████| 684/684 [00:00<00:00, 6.01MB/s]
|
||||||
|
Downloading model.safetensors: 100%|███████████████████████████████| 133M/133M [00:03<00:00, 39.5MB/s]
|
||||||
|
Downloading (…)okenizer_config.json: 100%|███████████████████████████| 366/366 [00:00<00:00, 2.79MB/s]
|
||||||
|
Downloading (…)solve/main/vocab.txt: 100%|█████████████████████████| 232k/232k [00:00<00:00, 6.00MB/s]
|
||||||
|
Downloading (…)/main/tokenizer.json: 100%|█████████████████████████| 711k/711k [00:00<00:00, 18.8MB/s]
|
||||||
|
Downloading (…)cial_tokens_map.json: 100%|███████████████████████████| 125/125 [00:00<00:00, 1.18MB/s]
|
||||||
|
LocalAI is a community-driven project that aims to make AI accessible to everyone. It was created by Ettore Di Giacinto and is focused on providing various AI-related features such as text generation with GPTs, text to audio, audio to text, image generation, and more. The project is constantly growing and evolving, with a roadmap for future improvements. Anyone is welcome to contribute, provide feedback, and submit pull requests to help make LocalAI better.
|
||||||
|
```
|
||||||
38
examples/llamaindex/main.py
Normal file
38
examples/llamaindex/main.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import weaviate
|
||||||
|
|
||||||
|
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
|
||||||
|
from llama_index.llms import LocalAI
|
||||||
|
from llama_index.vector_stores import WeaviateVectorStore
|
||||||
|
from llama_index.storage.storage_context import StorageContext
|
||||||
|
|
||||||
|
# Weaviate client setup
|
||||||
|
client = weaviate.Client("http://weviate.default")
|
||||||
|
|
||||||
|
# Weaviate vector store setup
|
||||||
|
vector_store = WeaviateVectorStore(weaviate_client=client, index_name="AIChroma")
|
||||||
|
|
||||||
|
# Storage context setup
|
||||||
|
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
||||||
|
|
||||||
|
# LocalAI setup
|
||||||
|
llm = LocalAI(temperature=0, model_name="gpt-3.5-turbo", api_base="http://local-ai.default", api_key="stub")
|
||||||
|
llm.globally_use_chat_completions = True;
|
||||||
|
|
||||||
|
# Service context setup
|
||||||
|
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local")
|
||||||
|
|
||||||
|
# Load index from stored vectors
|
||||||
|
index = VectorStoreIndex.from_vector_store(
|
||||||
|
vector_store,
|
||||||
|
storage_context=storage_context,
|
||||||
|
service_context=service_context
|
||||||
|
)
|
||||||
|
|
||||||
|
# Query engine setup
|
||||||
|
query_engine = index.as_query_engine(similarity_top_k=1, vector_store_query_mode="hybrid")
|
||||||
|
|
||||||
|
# Query example
|
||||||
|
response = query_engine.query("What is LocalAI?")
|
||||||
|
print(response)
|
||||||
@@ -8,8 +8,6 @@ services:
|
|||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
ports:
|
ports:
|
||||||
- 8080:8080
|
- 8080:8080
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
volumes:
|
volumes:
|
||||||
- ./models:/models:cached
|
- ./models:/models:cached
|
||||||
command: ["/usr/bin/local-ai"]
|
command: ["/usr/bin/local-ai"]
|
||||||
|
|||||||
7
examples/models/.gitignore
vendored
Normal file
7
examples/models/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# Ignore everything but predefined models
|
||||||
|
*
|
||||||
|
!.gitignore
|
||||||
|
!completion.tmpl
|
||||||
|
!embeddings.yaml
|
||||||
|
!gpt4all.tmpl
|
||||||
|
!gpt-3.5-turbo.yaml
|
||||||
1
examples/query_data/models
Symbolic link
1
examples/query_data/models
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../models
|
||||||
@@ -1 +0,0 @@
|
|||||||
{{.Input}}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: text-embedding-ada-002
|
|
||||||
parameters:
|
|
||||||
model: bert
|
|
||||||
threads: 14
|
|
||||||
backend: bert-embeddings
|
|
||||||
embeddings: true
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
name: gpt-3.5-turbo
|
|
||||||
parameters:
|
|
||||||
model: ggml-gpt4all-j
|
|
||||||
top_k: 80
|
|
||||||
temperature: 0.2
|
|
||||||
top_p: 0.7
|
|
||||||
context_size: 1024
|
|
||||||
stopwords:
|
|
||||||
- "HUMAN:"
|
|
||||||
- "GPT:"
|
|
||||||
roles:
|
|
||||||
user: " "
|
|
||||||
system: " "
|
|
||||||
template:
|
|
||||||
completion: completion
|
|
||||||
chat: gpt4all
|
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||||
|
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||||
|
|
||||||
SLACK_APP_TOKEN=xapp-1-...
|
SLACK_APP_TOKEN=xapp-1-...
|
||||||
SLACK_BOT_TOKEN=xoxb-...
|
SLACK_BOT_TOKEN=xoxb-...
|
||||||
OPENAI_API_KEY=sk-...
|
OPENAI_API_KEY=sk-...
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ git clone https://github.com/seratch/ChatGPT-in-Slack
|
|||||||
# Download gpt4all-j to models/
|
# Download gpt4all-j to models/
|
||||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||||
|
|
||||||
# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
|
# Set the Slack bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
|
||||||
cp -rfv .env.example .env
|
cp -rfv .env.example .env
|
||||||
vim .env
|
vim .env
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user