mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-08 21:53:19 -05:00
Compare commits
185 Commits
v.1.24.0
...
enable_gpu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8e91345e2 | ||
|
|
ea4ade6b60 | ||
|
|
803a0ac02a | ||
|
|
bde87d00b9 | ||
|
|
0eae727366 | ||
|
|
3b4c5d54d8 | ||
|
|
4e16bc2f13 | ||
|
|
562ac62f59 | ||
|
|
796d0c99aa | ||
|
|
e7fa2e06f8 | ||
|
|
8123f009d0 | ||
|
|
622aaa9f7d | ||
|
|
7b1ee203ce | ||
|
|
f347e51927 | ||
|
|
9b17af18b3 | ||
|
|
23c7fbfe6b | ||
|
|
035fea676a | ||
|
|
6e1a234d15 | ||
|
|
5b596ea605 | ||
|
|
6bd56460de | ||
|
|
6ef7ea2635 | ||
|
|
f8c00fbaf1 | ||
|
|
d9a42cc4c5 | ||
|
|
fc0bc32814 | ||
|
|
c62504ac92 | ||
|
|
f227e918f9 | ||
|
|
c132dbadce | ||
|
|
b839eb80a1 | ||
|
|
23b03a7f03 | ||
|
|
9196583651 | ||
|
|
fd28252e55 | ||
|
|
94f20e2eb7 | ||
|
|
5ced99a8e7 | ||
|
|
c377e61ff0 | ||
|
|
a6fe0a020a | ||
|
|
bf2ed3d752 | ||
|
|
d17a92eef3 | ||
|
|
1a7be035d3 | ||
|
|
004baaa30f | ||
|
|
ef19268418 | ||
|
|
e82470341f | ||
|
|
88fa42de75 | ||
|
|
432513c3ba | ||
|
|
45370c212b | ||
|
|
e91f660eb1 | ||
|
|
3f3162e57c | ||
|
|
208d1fce58 | ||
|
|
128694213f | ||
|
|
8034ed3473 | ||
|
|
d22069c59e | ||
|
|
5a04d32b39 | ||
|
|
ab65f3a17d | ||
|
|
4e23cbebcf | ||
|
|
63418c1afc | ||
|
|
8ca671761a | ||
|
|
81a5ed9f31 | ||
|
|
528b9d9206 | ||
|
|
1a4c57fac2 | ||
|
|
44a7045732 | ||
|
|
8ac7186185 | ||
|
|
975387f7ae | ||
|
|
d793b5af5e | ||
|
|
5188776224 | ||
|
|
07249c0446 | ||
|
|
188301f403 | ||
|
|
e660721a0c | ||
|
|
e029cc66bc | ||
|
|
e34b5f0119 | ||
|
|
c223364816 | ||
|
|
74fd5844ca | ||
|
|
4ebc86df84 | ||
|
|
8cd03eff58 | ||
|
|
46660a16a0 | ||
|
|
27b097309e | ||
|
|
d0fa1f8e94 | ||
|
|
55e38fea0e | ||
|
|
274ace2898 | ||
|
|
a8cc3709c6 | ||
|
|
a28ab18987 | ||
|
|
048b81373d | ||
|
|
aea1d62ae6 | ||
|
|
601e54000d | ||
|
|
7bdf707dd3 | ||
|
|
4a7e7e9fdb | ||
|
|
bdf3f95346 | ||
|
|
453e9c5da9 | ||
|
|
3a69bd3ef5 | ||
|
|
a69c0f765e | ||
|
|
97d1367764 | ||
|
|
880e21288e | ||
|
|
2ba9762255 | ||
|
|
30f120ee6a | ||
|
|
28a36e20aa | ||
|
|
a8fb4d23f8 | ||
|
|
f37a4ec9c8 | ||
|
|
31ed13094b | ||
|
|
8ccf5b2044 | ||
|
|
247d85b523 | ||
|
|
54688db994 | ||
|
|
8590f5a599 | ||
|
|
289d51c049 | ||
|
|
813eaa867c | ||
|
|
abffb16292 | ||
|
|
50e439f633 | ||
|
|
25eb1415df | ||
|
|
0b28220f2b | ||
|
|
5661740990 | ||
|
|
255c31bddf | ||
|
|
7888fefeea | ||
|
|
0937835802 | ||
|
|
ea806b37ac | ||
|
|
d6614f3149 | ||
|
|
9a50a39848 | ||
|
|
2793e8f327 | ||
|
|
c0bb5c4bf6 | ||
|
|
cc74fc93b4 | ||
|
|
44b39195d6 | ||
|
|
2454110d81 | ||
|
|
ee59e7d45f | ||
|
|
605c319157 | ||
|
|
dc307a1cc0 | ||
|
|
e7981152b2 | ||
|
|
b3eb5c860b | ||
|
|
1c2f7409e3 | ||
|
|
57d41a3f94 | ||
|
|
f9d2bd24eb | ||
|
|
0e7e8eec53 | ||
|
|
9a30a246d8 | ||
|
|
c332499252 | ||
|
|
005f289632 | ||
|
|
3d7553317f | ||
|
|
8e4f6b2ee5 | ||
|
|
d5cad7d3ae | ||
|
|
355e9d4fb5 | ||
|
|
629185e10a | ||
|
|
deeef5fc24 | ||
|
|
b905c07650 | ||
|
|
1ff30034e8 | ||
|
|
c64b59c80c | ||
|
|
9a869bbaf6 | ||
|
|
fe1b54b713 | ||
|
|
cc84dfd50f | ||
|
|
158c7867e7 | ||
|
|
997c39ccd5 | ||
|
|
3bab307904 | ||
|
|
02704e38d3 | ||
|
|
9e5fb29965 | ||
|
|
7dba131d5f | ||
|
|
ce0b771217 | ||
|
|
44bc7aa3d0 | ||
|
|
7f0c88ed3e | ||
|
|
d15508f52c | ||
|
|
b111423b9c | ||
|
|
215a51c4c1 | ||
|
|
1120847f72 | ||
|
|
704323b805 | ||
|
|
10b0e13882 | ||
|
|
901f0709c5 | ||
|
|
0d6165e481 | ||
|
|
6583eed6b2 | ||
|
|
a9ca70ad4a | ||
|
|
ab5b75eb01 | ||
|
|
cc060a283d | ||
|
|
28db83e17b | ||
|
|
dbb1f86455 | ||
|
|
02f7c555af | ||
|
|
d982b38f76 | ||
|
|
bc2e4b952e | ||
|
|
afdc0ebfd7 | ||
|
|
1079b18ff7 | ||
|
|
8cb1061c11 | ||
|
|
2bacd0180d | ||
|
|
ddf9bc2335 | ||
|
|
a1afd940e3 | ||
|
|
8bb76201c0 | ||
|
|
ede71d398c | ||
|
|
0c73a637f1 | ||
|
|
37700f2d98 | ||
|
|
0ec695f9e4 | ||
|
|
7ffd21dbc8 | ||
|
|
48b3920656 | ||
|
|
63d91af555 | ||
|
|
a96c3bc885 | ||
|
|
77e1ae3d70 | ||
|
|
9cc8d90865 |
26
.env
26
.env
@@ -23,6 +23,12 @@ MODELS_PATH=/models
|
|||||||
## Enable debug mode
|
## Enable debug mode
|
||||||
# DEBUG=true
|
# DEBUG=true
|
||||||
|
|
||||||
|
## Disables COMPEL (Diffusers)
|
||||||
|
# COMPEL=0
|
||||||
|
|
||||||
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
|
# SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
||||||
@@ -44,3 +50,23 @@ MODELS_PATH=/models
|
|||||||
|
|
||||||
## Specify a default upload limit in MB (whisper)
|
## Specify a default upload limit in MB (whisper)
|
||||||
# UPLOAD_LIMIT
|
# UPLOAD_LIMIT
|
||||||
|
|
||||||
|
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
|
||||||
|
# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
|
||||||
|
|
||||||
|
### Advanced settings ###
|
||||||
|
### Those are not really used by LocalAI, but from components in the stack ###
|
||||||
|
##
|
||||||
|
### Preload libraries
|
||||||
|
# LD_PRELOAD=
|
||||||
|
|
||||||
|
### Huggingface cache for models
|
||||||
|
# HUGGINGFACE_HUB_CACHE=/usr/local/huggingface
|
||||||
|
|
||||||
|
### Python backends GRPC max workers
|
||||||
|
### Default number of workers for GRPC Python backends.
|
||||||
|
### This actually controls wether a backend can process multiple requests or not.
|
||||||
|
# PYTHON_GRPC_MAX_WORKERS=1
|
||||||
|
|
||||||
|
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||||
|
# LLAMACPP_PARALLEL=1
|
||||||
16
.github/PULL_REQUEST_TEMPLATE.md
vendored
16
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -8,16 +8,24 @@ This PR fixes #
|
|||||||
**[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
|
**[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
|
||||||
- [ ] Yes, I signed my commits.
|
- [ ] Yes, I signed my commits.
|
||||||
|
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Thank you for contributing to LocalAI!
|
Thank you for contributing to LocalAI!
|
||||||
|
|
||||||
Contributing Conventions:
|
Contributing Conventions
|
||||||
|
-------------------------
|
||||||
|
|
||||||
1. Include descriptive PR titles with [<component-name>] prepended.
|
The draft above helps to give a quick overview of your PR.
|
||||||
2. Build and test your changes before submitting a PR.
|
|
||||||
|
Remember to remove this comment and to at least:
|
||||||
|
|
||||||
|
1. Include descriptive PR titles with [<component-name>] prepended. We use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/).
|
||||||
|
2. Build and test your changes before submitting a PR (`make build`).
|
||||||
3. Sign your commits
|
3. Sign your commits
|
||||||
|
4. **Tag maintainer:** for a quicker response, tag the relevant maintainer (see below).
|
||||||
|
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out!
|
||||||
|
|
||||||
By following the community's contribution conventions upfront, the review process will
|
By following the community's contribution conventions upfront, the review process will
|
||||||
be accelerated and your PR merged more quickly.
|
be accelerated and your PR merged more quickly.
|
||||||
|
|
||||||
|
If no one reviews your PR within a few days, please @-mention @mudler.
|
||||||
-->
|
-->
|
||||||
5
.github/workflows/bump_deps.yaml
vendored
5
.github/workflows/bump_deps.yaml
vendored
@@ -12,6 +12,9 @@ jobs:
|
|||||||
- repository: "go-skynet/go-llama.cpp"
|
- repository: "go-skynet/go-llama.cpp"
|
||||||
variable: "GOLLAMA_VERSION"
|
variable: "GOLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
- repository: "ggerganov/llama.cpp"
|
||||||
|
variable: "CPPLLAMA_VERSION"
|
||||||
|
branch: "master"
|
||||||
- repository: "go-skynet/go-ggml-transformers.cpp"
|
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||||
variable: "GOGGMLTRANSFORMERS_VERSION"
|
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
@@ -41,7 +44,7 @@ jobs:
|
|||||||
branch: "master"
|
branch: "master"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v4
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
|
|||||||
100
.github/workflows/image.yml
vendored
100
.github/workflows/image.yml
vendored
@@ -14,15 +14,21 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
docker:
|
image-build:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: ''
|
tag-suffix: ''
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
|
- build-type: ''
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 11
|
cuda-major-version: 11
|
||||||
cuda-minor-version: 7
|
cuda-minor-version: 7
|
||||||
@@ -37,11 +43,6 @@ jobs:
|
|||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12'
|
tag-suffix: '-cublas-cuda12'
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
- build-type: ''
|
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 11
|
cuda-major-version: 11
|
||||||
cuda-minor-version: 7
|
cuda-minor-version: 7
|
||||||
@@ -57,46 +58,57 @@ jobs:
|
|||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: arc-runner-set
|
||||||
steps:
|
steps:
|
||||||
- name: Release space from worker
|
- name: Force Install GIT latest
|
||||||
run: |
|
run: |
|
||||||
echo "Listing top largest packages"
|
sudo apt-get update \
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
&& sudo apt-get install -y software-properties-common \
|
||||||
head -n 30 <<< "${pkgs}"
|
&& sudo apt-get update \
|
||||||
echo
|
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||||
df -h
|
&& sudo apt-get update \
|
||||||
echo
|
&& sudo apt-get install -y git
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
df -h
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
# - name: Release space from worker
|
||||||
|
# run: |
|
||||||
|
# echo "Listing top largest packages"
|
||||||
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
# head -n 30 <<< "${pkgs}"
|
||||||
|
# echo
|
||||||
|
# df -h
|
||||||
|
# echo
|
||||||
|
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
# sudo rm -rf /usr/local/lib/android
|
||||||
|
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
# sudo rm -rf /usr/share/dotnet
|
||||||
|
# sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
# sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
# sudo apt-get remove -y 'php.*' || true
|
||||||
|
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
# sudo apt-get remove -y '^google-.*' || true
|
||||||
|
# sudo apt-get remove -y azure-cli || true
|
||||||
|
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
# sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
# sudo apt-get remove -y microsoft-edge-stable || true
|
||||||
|
# sudo apt-get remove -y firefox || true
|
||||||
|
# sudo apt-get remove -y powershell || true
|
||||||
|
# sudo apt-get remove -y r-base-core || true
|
||||||
|
# sudo apt-get autoremove -y
|
||||||
|
# sudo apt-get clean
|
||||||
|
# echo
|
||||||
|
# echo "Listing top largest packages"
|
||||||
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
# head -n 30 <<< "${pkgs}"
|
||||||
|
# echo
|
||||||
|
# sudo rm -rfv build || true
|
||||||
|
# df -h
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v4
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: quay.io/go-skynet/local-ai
|
images: quay.io/go-skynet/local-ai
|
||||||
tags: |
|
tags: |
|
||||||
@@ -118,14 +130,14 @@ jobs:
|
|||||||
|
|
||||||
- name: Login to DockerHub
|
- name: Login to DockerHub
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
registry: quay.io
|
registry: quay.io
|
||||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
uses: docker/build-push-action@v4
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
|
|||||||
24
.github/workflows/release.yaml
vendored
24
.github/workflows/release.yaml
vendored
@@ -19,13 +19,22 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: '>=1.21.0'
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && sudo make -j12 install
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
@@ -57,15 +66,26 @@ jobs:
|
|||||||
runs-on: macOS-latest
|
runs-on: macOS-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: '>=1.21.0'
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make -j12 install && rm -rf grpc
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
BUILD_ID: "${{ matrix.build }}"
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v3
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
|||||||
63
.github/workflows/test-gpu.yml
vendored
Normal file
63
.github/workflows/test-gpu.yml
vendored
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
---
|
||||||
|
name: 'GPU tests'
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
tags:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ubuntu-latest:
|
||||||
|
runs-on: gpu
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
go-version: ['1.21.x']
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
# You can test your matrix by printing the current Go version
|
||||||
|
- name: Display Go version
|
||||||
|
run: go version
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
if [ ! -e /run/systemd/system ]; then
|
||||||
|
sudo mkdir /run/systemd/system
|
||||||
|
fi
|
||||||
|
sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
|
||||||
|
sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
|
||||||
|
make \
|
||||||
|
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||||
|
BUILD_TYPE=cublas \
|
||||||
|
prepare-e2e run-e2e-image test-e2e
|
||||||
|
- name: Release space from worker ♻
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
sudo rm -rf build || true
|
||||||
|
sudo rm -rf bin || true
|
||||||
|
sudo rm -rf dist || true
|
||||||
|
sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt
|
||||||
|
sudo cat logs.txt || true
|
||||||
|
sudo rm -rf logs.txt
|
||||||
|
make clean || true
|
||||||
|
make \
|
||||||
|
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||||
|
teardown-e2e || true
|
||||||
|
sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
|
||||||
|
docker system prune -f -a --volumes || true
|
||||||
112
.github/workflows/test.yml
vendored
112
.github/workflows/test.yml
vendored
@@ -14,51 +14,113 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest:
|
tests-linux:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
|
- name: Release space from worker
|
||||||
|
run: |
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
df -h
|
||||||
|
echo
|
||||||
|
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo rm -rf /usr/local/lib/android
|
||||||
|
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
sudo rm -rf /usr/share/dotnet
|
||||||
|
sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
sudo apt-get remove -y 'php.*' || true
|
||||||
|
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
sudo apt-get remove -y '^google-.*' || true
|
||||||
|
sudo apt-get remove -y azure-cli || true
|
||||||
|
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
sudo apt-get autoremove -y
|
||||||
|
sudo apt-get clean
|
||||||
|
echo
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
sudo rm -rfv build || true
|
||||||
|
df -h
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
# You can test your matrix by printing the current Go version
|
||||||
|
- name: Display Go version
|
||||||
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
sudo pip install -r extra/requirements.txt
|
|
||||||
|
|
||||||
sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/huggingface
|
||||||
tar -xzvf - && \
|
|
||||||
mkdir -p "spdlog-1.11.0/build" && \
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
cd "spdlog-1.11.0/build" && \
|
make go-piper && \
|
||||||
cmake .. && \
|
GO_TAGS="tts" make -C go-piper piper.o && \
|
||||||
make -j8 && \
|
sudo cp -rfv go-piper/piper/build/pi/lib/. /usr/lib/ && \
|
||||||
sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
|
||||||
cd /build && \
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
|
|
||||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
../.. && sudo make -j12 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
|
GO_TAGS="stablediffusion tts" make test
|
||||||
|
|
||||||
macOS-latest:
|
tests-apple:
|
||||||
runs-on: macOS-latest
|
runs-on: macOS-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
# You can test your matrix by printing the current Go version
|
||||||
|
- name: Display Go version
|
||||||
|
run: go version
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make -j12 install && rm -rf grpc
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
||||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,5 +1,6 @@
|
|||||||
# go-llama build artifacts
|
# go-llama build artifacts
|
||||||
go-llama
|
go-llama
|
||||||
|
go-llama-stable
|
||||||
/gpt4all
|
/gpt4all
|
||||||
go-stable-diffusion
|
go-stable-diffusion
|
||||||
go-piper
|
go-piper
|
||||||
@@ -9,6 +10,8 @@ go-ggllm
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
*.a
|
*.a
|
||||||
get-sources
|
get-sources
|
||||||
|
/backend/cpp/llama/grpc-server
|
||||||
|
/backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
go-ggml-transformers
|
go-ggml-transformers
|
||||||
go-gpt2
|
go-gpt2
|
||||||
@@ -22,6 +25,8 @@ LocalAI
|
|||||||
local-ai
|
local-ai
|
||||||
# prevent above rules from omitting the helm chart
|
# prevent above rules from omitting the helm chart
|
||||||
!charts/*
|
!charts/*
|
||||||
|
# prevent above rules from omitting the api/localai folder
|
||||||
|
!api/localai
|
||||||
|
|
||||||
# Ignore models
|
# Ignore models
|
||||||
models/*
|
models/*
|
||||||
|
|||||||
72
CONTRIBUTING.md
Normal file
72
CONTRIBUTING.md
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
# Contributing to localAI
|
||||||
|
|
||||||
|
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Getting Started](#getting-started)
|
||||||
|
- [Prerequisites](#prerequisites)
|
||||||
|
- [Setting up the Development Environment](#setting-up-the-development-environment)
|
||||||
|
- [Contributing](#contributing)
|
||||||
|
- [Submitting an Issue](#submitting-an-issue)
|
||||||
|
- [Creating a Pull Request (PR)](#creating-a-pull-request-pr)
|
||||||
|
- [Coding Guidelines](#coding-guidelines)
|
||||||
|
- [Testing](#testing)
|
||||||
|
- [Documentation](#documentation)
|
||||||
|
- [Community and Communication](#community-and-communication)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Golang [1.21]
|
||||||
|
- Git
|
||||||
|
- macOS/Linux
|
||||||
|
|
||||||
|
### Setting up the Development Environment and running localAI in the local environment
|
||||||
|
|
||||||
|
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
|
||||||
|
2. Navigate to the project directory: `cd LocalAI`
|
||||||
|
3. Install the required dependencies: `make prepare`
|
||||||
|
4. Run LocalAI: `make run`
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
We welcome contributions from everyone! To get started, follow these steps:
|
||||||
|
|
||||||
|
### Submitting an Issue
|
||||||
|
|
||||||
|
If you find a bug, have a feature request, or encounter any issues, please check the [issue tracker](https://github.com/go-skynet/LocalAI/issues) to see if a similar issue has already been reported. If not, feel free to [create a new issue](https://github.com/go-skynet/LocalAI/issues/new) and provide as much detail as possible.
|
||||||
|
|
||||||
|
### Creating a Pull Request (PR)
|
||||||
|
|
||||||
|
1. Fork the repository.
|
||||||
|
2. Create a new branch with a descriptive name: `git checkout -b [branch name]`
|
||||||
|
3. Make your changes and commit them.
|
||||||
|
4. Push the changes to your fork: `git push origin [branch name]`
|
||||||
|
5. Create a new pull request from your branch to the main project's `main` or `master` branch.
|
||||||
|
6. Provide a clear description of your changes in the pull request.
|
||||||
|
7. Make any requested changes during the review process.
|
||||||
|
8. Once your PR is approved, it will be merged into the main project.
|
||||||
|
|
||||||
|
## Coding Guidelines
|
||||||
|
|
||||||
|
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
|
||||||
|
|
||||||
|
## Community and Communication
|
||||||
|
|
||||||
|
- You can reach out via the Github issue tracker.
|
||||||
|
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||||
|
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
|
---
|
||||||
162
Dockerfile
162
Dockerfile
@@ -1,22 +1,27 @@
|
|||||||
ARG GO_VERSION=1.20-bullseye
|
ARG GO_VERSION=1.21-bullseye
|
||||||
|
ARG IMAGE_TYPE=extras
|
||||||
|
# extras or core
|
||||||
|
|
||||||
FROM golang:$GO_VERSION as requirements
|
|
||||||
|
FROM golang:$GO_VERSION as requirements-core
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=11
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ARG CUDA_MINOR_VERSION=7
|
ARG CUDA_MINOR_VERSION=7
|
||||||
ARG SPDLOG_VERSION="1.11.0"
|
|
||||||
ARG PIPER_PHONEMIZE_VERSION='1.0.0'
|
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py,autogptq:/build/extra/grpc/autogptq/autogptq.py,bark:/build/extra/grpc/bark/ttsbark.py,diffusers:/build/extra/grpc/diffusers/backend_diffusers.py,exllama:/build/extra/grpc/exllama/exllama.py"
|
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/run.sh,autogptq:/build/extra/grpc/autogptq/run.sh,bark:/build/extra/grpc/bark/run.sh,diffusers:/build/extra/grpc/diffusers/run.sh,exllama:/build/extra/grpc/exllama/run.sh,vall-e-x:/build/extra/grpc/vall-e-x/run.sh,vllm:/build/extra/grpc/vllm/run.sh"
|
||||||
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y ca-certificates cmake curl patch pip
|
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
|
||||||
|
|
||||||
|
|
||||||
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
|
RUN update-ca-certificates
|
||||||
|
|
||||||
# Use the variables in subsequent instructions
|
# Use the variables in subsequent instructions
|
||||||
RUN echo "Target Architecture: $TARGETARCH"
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
@@ -30,63 +35,62 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|||||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
||||||
rm -f cuda-keyring_1.0-1_all.deb && \
|
rm -f cuda-keyring_1.0-1_all.deb && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||||
; fi
|
; fi
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
|
# OpenBLAS requirements and stable diffusion
|
||||||
|
RUN apt-get install -y \
|
||||||
|
libopenblas-dev \
|
||||||
|
libopencv-dev \
|
||||||
|
&& apt-get clean
|
||||||
|
|
||||||
|
# Set up OpenCV
|
||||||
|
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
RUN test -n "$TARGETARCH" \
|
||||||
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
# Extras requirements
|
# Extras requirements
|
||||||
|
FROM requirements-core as requirements-extras
|
||||||
|
|
||||||
|
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y conda
|
||||||
|
|
||||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
COPY extra/requirements.txt /build/extra/requirements.txt
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
RUN pip install --upgrade pip
|
RUN pip install --upgrade pip
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
#RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||||
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
# pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
||||||
fi
|
# fi
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
#RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
||||||
pip install torch && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
# pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
||||||
fi
|
# fi
|
||||||
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
#RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
||||||
|
|
||||||
WORKDIR /build
|
# Vall-e-X
|
||||||
|
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
||||||
|
|
||||||
# OpenBLAS requirements
|
|
||||||
RUN apt-get install -y libopenblas-dev
|
|
||||||
|
|
||||||
# Stable Diffusion requirements
|
|
||||||
RUN apt-get install -y libopencv-dev && \
|
|
||||||
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
|
||||||
|
|
||||||
|
|
||||||
# piper requirements
|
|
||||||
# Use pre-compiled Piper phonemization library (includes onnxruntime)
|
|
||||||
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
|
|
||||||
RUN test -n "$TARGETARCH" \
|
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|
||||||
|
|
||||||
RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
|
|
||||||
tar -xzvf - && \
|
|
||||||
mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
|
|
||||||
cd "spdlog-${SPDLOG_VERSION}/build" && \
|
|
||||||
cmake .. && \
|
|
||||||
make -j8 && \
|
|
||||||
cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
|
||||||
cd /build && \
|
|
||||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
|
||||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \
|
|
||||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
|
||||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
|
||||||
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
|
||||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
|
||||||
# \
|
# \
|
||||||
# ; fi
|
# ; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements as builder
|
FROM requirements-${IMAGE_TYPE} as builder
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
|
ARG GRPC_BACKENDS
|
||||||
|
ARG BUILD_GRPC=true
|
||||||
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
@@ -101,21 +105,43 @@ RUN make prepare
|
|||||||
COPY . .
|
COPY . .
|
||||||
COPY .git .
|
COPY .git .
|
||||||
|
|
||||||
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
|
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make -j12 install && rm -rf grpc \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
# Rebuild with defaults backends
|
||||||
|
RUN make build
|
||||||
|
|
||||||
|
RUN if [ ! -d "/build/go-piper/piper/build/pi/lib/" ]; then \
|
||||||
|
mkdir -p /build/go-piper/piper/build/pi/lib/ \
|
||||||
|
touch /build/go-piper/piper/build/pi/lib/keep \
|
||||||
|
; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements
|
FROM requirements-${IMAGE_TYPE}
|
||||||
|
|
||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
|
ARG IMAGE_TYPE=extras
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV REBUILD=false
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
|
|
||||||
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
apt-get install -y ffmpeg \
|
apt-get install -y ffmpeg \
|
||||||
@@ -129,11 +155,49 @@ WORKDIR /build
|
|||||||
# https://github.com/go-skynet/LocalAI/pull/434
|
# https://github.com/go-skynet/LocalAI/pull/434
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN make prepare-sources
|
RUN make prepare-sources
|
||||||
|
|
||||||
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
# To resolve exllama import error
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
|
# Copy shared libraries for piper
|
||||||
|
COPY --from=builder /build/go-piper/piper/build/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
|
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
|
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/autogptq \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/bark \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/diffusers \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/vllm \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/huggingface \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/vall-e-x \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C extra/grpc/exllama \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
# Copy VALLE-X as it's not a real "lib"
|
||||||
|
RUN if [ -d /usr/lib/vall-e-x ]; then \
|
||||||
|
cp -rfv /usr/lib/vall-e-x/* ./ ; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# we also copy exllama libs over to resolve exllama import error
|
||||||
|
RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
|
||||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||||
|
|||||||
216
Makefile
216
Makefile
@@ -4,11 +4,15 @@ GOVET=$(GOCMD) vet
|
|||||||
BINARY_NAME=local-ai
|
BINARY_NAME=local-ai
|
||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
||||||
|
|
||||||
|
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||||
|
|
||||||
|
CPPLLAMA_VERSION?=a75fa576abba9d37f463580c379e4bbf1e1ad03c
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
GPT4ALL_VERSION?=0f2bb506a8ee752afc06cbb832773bf85b97eef3
|
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
||||||
|
|
||||||
# go-ggml-transformers version
|
# go-ggml-transformers version
|
||||||
GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
|
GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
|
||||||
@@ -24,23 +28,23 @@ WHISPER_CPP_VERSION?=85ed71aaec8e0612a84c0b67804bde75aa75a273
|
|||||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
|
PIPER_VERSION?=736f6fb639ab8e3397356e48eeb6bdcb9da88a78
|
||||||
|
|
||||||
# go-bloomz version
|
|
||||||
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
|
|
||||||
|
|
||||||
# stablediffusion version
|
# stablediffusion version
|
||||||
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
||||||
|
|
||||||
# Go-ggllm
|
|
||||||
GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b
|
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
|
export CMAKE_ARGS?=
|
||||||
CGO_LDFLAGS?=
|
CGO_LDFLAGS?=
|
||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=git
|
BUILD_ID?=git
|
||||||
|
|
||||||
|
TEST_DIR=/tmp/test
|
||||||
|
|
||||||
|
RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||||
|
|
||||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||||
# go tool nm ./local-ai | grep Commit
|
# go tool nm ./local-ai | grep Commit
|
||||||
LD_FLAGS?=
|
LD_FLAGS?=
|
||||||
@@ -48,7 +52,6 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION
|
|||||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||||
|
|
||||||
OPTIONAL_TARGETS?=
|
OPTIONAL_TARGETS?=
|
||||||
ESPEAK_DATA?=
|
|
||||||
|
|
||||||
OS := $(shell uname -s)
|
OS := $(shell uname -s)
|
||||||
ARCH := $(shell uname -m)
|
ARCH := $(shell uname -m)
|
||||||
@@ -58,13 +61,19 @@ WHITE := $(shell tput -Txterm setaf 7)
|
|||||||
CYAN := $(shell tput -Txterm setaf 6)
|
CYAN := $(shell tput -Txterm setaf 6)
|
||||||
RESET := $(shell tput -Txterm sgr0)
|
RESET := $(shell tput -Txterm sgr0)
|
||||||
|
|
||||||
|
# Default Docker bridge IP
|
||||||
|
E2E_BRIDGE_IP?=172.17.0.1
|
||||||
|
|
||||||
ifndef UNAME_S
|
ifndef UNAME_S
|
||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# workaround for rwkv.cpp
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
CGO_LDFLAGS += -lcblas -framework Accelerate
|
CGO_LDFLAGS += -lcblas -framework Accelerate
|
||||||
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
|
# explicit disable metal if on Darwin and metal is disabled
|
||||||
|
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),openblas)
|
ifeq ($(BUILD_TYPE),openblas)
|
||||||
@@ -76,6 +85,18 @@ ifeq ($(BUILD_TYPE),cublas)
|
|||||||
export LLAMA_CUBLAS=1
|
export LLAMA_CUBLAS=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
ROCM_HOME ?= /opt/rocm
|
||||||
|
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||||
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
|
# Llama-stable has no hipblas support, so override it here.
|
||||||
|
export STABLE_BUILD_TYPE=
|
||||||
|
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||||
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||||
export LLAMA_METAL=1
|
export LLAMA_METAL=1
|
||||||
@@ -98,9 +119,19 @@ endif
|
|||||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
|
PIPER_CGO_CXXFLAGS+=-I$(shell pwd)/go-piper/piper/src/cpp -I$(shell pwd)/go-piper/piper/build/fi/include -I$(shell pwd)/go-piper/piper/build/pi/include -I$(shell pwd)/go-piper/piper/build/si/include
|
||||||
|
PIPER_CGO_LDFLAGS+=-L$(shell pwd)/go-piper/piper/build/fi/lib -L$(shell pwd)/go-piper/piper/build/pi/lib -L$(shell pwd)/go-piper/piper/build/si/lib -lfmt -lspdlog
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||||
|
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||||
|
|
||||||
|
# If empty, then we build all
|
||||||
|
ifeq ($(GRPC_BACKENDS),)
|
||||||
|
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
|
||||||
|
endif
|
||||||
|
|
||||||
.PHONY: all test build vendor
|
.PHONY: all test build vendor
|
||||||
|
|
||||||
all: help
|
all: help
|
||||||
@@ -110,14 +141,6 @@ gpt4all:
|
|||||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
## go-ggllm
|
|
||||||
go-ggllm:
|
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
|
|
||||||
cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
go-ggllm/libggllm.a: go-ggllm
|
|
||||||
$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
|
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
go-piper:
|
go-piper:
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
||||||
@@ -144,14 +167,6 @@ go-rwkv:
|
|||||||
go-rwkv/librwkv.a: go-rwkv
|
go-rwkv/librwkv.a: go-rwkv
|
||||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
## bloomz
|
|
||||||
bloomz:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
|
||||||
cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
bloomz/libbloomz.a: bloomz
|
|
||||||
cd bloomz && make libbloomz.a
|
|
||||||
|
|
||||||
go-bert/libgobert.a: go-bert
|
go-bert/libgobert.a: go-bert
|
||||||
$(MAKE) -C go-bert libgobert.a
|
$(MAKE) -C go-bert libgobert.a
|
||||||
|
|
||||||
@@ -161,14 +176,10 @@ backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
|||||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||||
|
|
||||||
backend-assets/espeak-ng-data:
|
backend-assets/espeak-ng-data: go-piper
|
||||||
mkdir -p backend-assets/espeak-ng-data
|
mkdir -p backend-assets/espeak-ng-data
|
||||||
ifdef ESPEAK_DATA
|
$(MAKE) -C go-piper piper.o
|
||||||
@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
|
@cp -rf go-piper/piper/build/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||||
else
|
|
||||||
@echo "ESPEAK_DATA not set, skipping tts. Note that this will break the tts functionality."
|
|
||||||
@touch backend-assets/espeak-ng-data/keep
|
|
||||||
endif
|
|
||||||
|
|
||||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
@@ -192,26 +203,30 @@ go-llama:
|
|||||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||||
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
go-llama-stable:
|
||||||
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-stable
|
||||||
|
cd go-llama-stable && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
go-llama/libbinding.a: go-llama
|
go-llama/libbinding.a: go-llama
|
||||||
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
go-piper/libpiper_binding.a:
|
go-llama-stable/libbinding.a: go-llama-stable
|
||||||
|
$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
|
go-piper/libpiper_binding.a: go-piper
|
||||||
$(MAKE) -C go-piper libpiper_binding.a example/main
|
$(MAKE) -C go-piper libpiper_binding.a example/main
|
||||||
|
|
||||||
get-sources: go-llama go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
|
get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion
|
||||||
touch $@
|
touch $@
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
|
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
@@ -220,15 +235,14 @@ prepare-sources: get-sources replace
|
|||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C go-llama clean
|
$(MAKE) -C go-llama clean
|
||||||
|
$(MAKE) -C go-llama-stable clean
|
||||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
||||||
$(MAKE) -C go-ggml-transformers clean
|
$(MAKE) -C go-ggml-transformers clean
|
||||||
$(MAKE) -C go-rwkv clean
|
$(MAKE) -C go-rwkv clean
|
||||||
$(MAKE) -C whisper.cpp clean
|
$(MAKE) -C whisper.cpp clean
|
||||||
$(MAKE) -C go-stable-diffusion clean
|
$(MAKE) -C go-stable-diffusion clean
|
||||||
$(MAKE) -C go-bert clean
|
$(MAKE) -C go-bert clean
|
||||||
$(MAKE) -C bloomz clean
|
|
||||||
$(MAKE) -C go-piper clean
|
$(MAKE) -C go-piper clean
|
||||||
$(MAKE) -C go-ggllm clean
|
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
@@ -239,18 +253,21 @@ clean: ## Remove build related file
|
|||||||
rm -f prepare
|
rm -f prepare
|
||||||
rm -rf ./go-llama
|
rm -rf ./go-llama
|
||||||
rm -rf ./gpt4all
|
rm -rf ./gpt4all
|
||||||
|
rm -rf ./go-llama-stable
|
||||||
rm -rf ./go-gpt2
|
rm -rf ./go-gpt2
|
||||||
rm -rf ./go-stable-diffusion
|
rm -rf ./go-stable-diffusion
|
||||||
rm -rf ./go-ggml-transformers
|
rm -rf ./go-ggml-transformers
|
||||||
rm -rf ./backend-assets
|
rm -rf ./backend-assets
|
||||||
rm -rf ./go-rwkv
|
rm -rf ./go-rwkv
|
||||||
rm -rf ./go-bert
|
rm -rf ./go-bert
|
||||||
rm -rf ./bloomz
|
|
||||||
rm -rf ./whisper.cpp
|
rm -rf ./whisper.cpp
|
||||||
rm -rf ./go-piper
|
rm -rf ./go-piper
|
||||||
rm -rf ./go-ggllm
|
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
rm -rf release/
|
rm -rf release/
|
||||||
|
rm -rf ./backend/cpp/grpc/grpc_repo
|
||||||
|
rm -rf ./backend/cpp/grpc/build
|
||||||
|
rm -rf ./backend/cpp/grpc/installed_packages
|
||||||
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
|
|
||||||
@@ -273,12 +290,12 @@ run: prepare ## run local-ai
|
|||||||
test-models/testmodel:
|
test-models/testmodel:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
||||||
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
wget https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||||
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
prepare-test: grpcs
|
prepare-test: grpcs
|
||||||
@@ -289,13 +306,34 @@ test: prepare test-models/testmodel grpcs
|
|||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion"
|
export GO_TAGS="tts stablediffusion"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
|
||||||
$(MAKE) test-gpt4all
|
$(MAKE) test-gpt4all
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
$(MAKE) test-stablediffusion
|
$(MAKE) test-stablediffusion
|
||||||
|
|
||||||
|
prepare-e2e:
|
||||||
|
mkdir -p $(TEST_DIR)
|
||||||
|
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||||
|
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||||
|
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
||||||
|
|
||||||
|
run-e2e-image:
|
||||||
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
|
test-e2e:
|
||||||
|
@echo 'Running e2e tests'
|
||||||
|
BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
|
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
||||||
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
|
||||||
|
|
||||||
|
teardown-e2e:
|
||||||
|
rm -rf $(TEST_DIR) || true
|
||||||
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
test-gpt4all: prepare-test
|
test-gpt4all: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
@@ -304,6 +342,10 @@ test-llama: prepare-test
|
|||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
|
|
||||||
|
test-llama-gguf: prepare-test
|
||||||
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
|
||||||
|
|
||||||
test-tts: prepare-test
|
test-tts: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg
|
||||||
@@ -340,17 +382,26 @@ protogen-python:
|
|||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/exllama/ --grpc_python_out=extra/grpc/exllama/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/exllama/ --grpc_python_out=extra/grpc/exllama/ pkg/grpc/proto/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/bark/ --grpc_python_out=extra/grpc/bark/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/bark/ --grpc_python_out=extra/grpc/bark/ pkg/grpc/proto/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/diffusers/ --grpc_python_out=extra/grpc/diffusers/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/diffusers/ --grpc_python_out=extra/grpc/diffusers/ pkg/grpc/proto/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vall-e-x/ --grpc_python_out=extra/grpc/vall-e-x/ pkg/grpc/proto/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vllm/ --grpc_python_out=extra/grpc/vllm/ pkg/grpc/proto/backend.proto
|
||||||
|
|
||||||
## GRPC
|
## GRPC
|
||||||
|
# Note: it is duplicated in the Dockerfile
|
||||||
|
prepare-extra-conda-environments:
|
||||||
|
$(MAKE) -C extra/grpc/autogptq
|
||||||
|
$(MAKE) -C extra/grpc/bark
|
||||||
|
$(MAKE) -C extra/grpc/diffusers
|
||||||
|
$(MAKE) -C extra/grpc/vllm
|
||||||
|
$(MAKE) -C extra/grpc/huggingface
|
||||||
|
$(MAKE) -C extra/grpc/vall-e-x
|
||||||
|
$(MAKE) -C extra/grpc/exllama
|
||||||
|
|
||||||
|
|
||||||
backend-assets/grpc:
|
backend-assets/grpc:
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/
|
|
||||||
|
|
||||||
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
||||||
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
|
||||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
@@ -358,6 +409,42 @@ ifeq ($(BUILD_TYPE),metal)
|
|||||||
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
## BACKEND CPP LLAMA START
|
||||||
|
# Sets the variables in case it has to build the gRPC locally.
|
||||||
|
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
||||||
|
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||||
|
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||||
|
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||||
|
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||||
|
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||||
|
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||||
|
|
||||||
|
backend/cpp/llama/grpc-server:
|
||||||
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
|
backend/cpp/grpc/script/build_grpc.sh ${INSTALLED_PACKAGES}
|
||||||
|
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
|
||||||
|
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
|
||||||
|
export PATH=${PATH}:${INSTALLED_PACKAGES}/bin && \
|
||||||
|
CMAKE_ARGS="${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
|
else
|
||||||
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
|
endif
|
||||||
|
## BACKEND CPP LLAMA END
|
||||||
|
|
||||||
|
##
|
||||||
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||||
|
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||||
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
|
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
|
||||||
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-stable ./cmd/grpc/llama-stable/
|
||||||
|
|
||||||
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/
|
||||||
@@ -398,10 +485,6 @@ backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
|
|||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
||||||
|
|
||||||
backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/
|
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
||||||
@@ -409,16 +492,19 @@ backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
|||||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: backend-assets/grpc go-stable-diffusion/libstablediffusion.a
|
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/
|
$(MAKE) go-stable-diffusion/libstablediffusion.a; \
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/; \
|
||||||
|
fi
|
||||||
|
|
||||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
|
||||||
|
|
||||||
backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
|
backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/
|
||||||
|
|
||||||
grpcs: prepare backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
grpcs: prepare $(GRPC_BACKENDS)
|
||||||
|
|||||||
26
README.md
26
README.md
@@ -27,7 +27,7 @@
|
|||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format. Does not require GPU.
|
**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format, pytorch and more. Does not require GPU.
|
||||||
|
|
||||||
<p align="center"><b>Follow LocalAI </b></p>
|
<p align="center"><b>Follow LocalAI </b></p>
|
||||||
|
|
||||||
@@ -104,15 +104,30 @@ Note that this started just as a [fun weekend project](https://localai.io/#backs
|
|||||||
|
|
||||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||||
|
|
||||||
### 💡 Example: Use GPT4ALL-J model
|
### 💡 Example: Use Luna-AI Llama model
|
||||||
|
|
||||||
See the [documentation](https://localai.io/basics/getting_started/#example-use-gpt4all-j-model-with-docker-compose)
|
See the [documentation](https://localai.io/basics/getting_started)
|
||||||
|
|
||||||
### 🔗 Resources
|
### 🔗 Resources
|
||||||
|
|
||||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||||
- [Projects integrating LocalAI](https://localai.io/integrations/)
|
- [Projects integrating LocalAI](https://localai.io/integrations/)
|
||||||
|
- [How tos section](https://localai.io/howtos/) (curated by our community)
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
|
||||||
|
If you utilize this repository, data in a downstream project, please consider citing it with:
|
||||||
|
|
||||||
|
```
|
||||||
|
@misc{localai,
|
||||||
|
author = {Ettore Di Giacinto},
|
||||||
|
title = {LocalAI: The free, Open source OpenAI alternative},
|
||||||
|
year = {2023},
|
||||||
|
publisher = {GitHub},
|
||||||
|
journal = {GitHub repository},
|
||||||
|
howpublished = {\url{https://github.com/go-skynet/LocalAI}},
|
||||||
|
```
|
||||||
|
|
||||||
## ❤️ Sponsors
|
## ❤️ Sponsors
|
||||||
|
|
||||||
@@ -127,6 +142,11 @@ A huge thank you to our generous sponsors who support this project:
|
|||||||
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||||
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
||||||
|
|
||||||
|
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||||
|
|
||||||
|
- [Sponsor list](https://github.com/sponsors/mudler)
|
||||||
|
- JDAM00 (donating HW for the CI)
|
||||||
|
|
||||||
## 🌟 Star history
|
## 🌟 Star history
|
||||||
|
|
||||||
[](https://star-history.com/#go-skynet/LocalAI&Date)
|
[](https://star-history.com/#go-skynet/LocalAI&Date)
|
||||||
|
|||||||
170
api/api.go
170
api/api.go
@@ -2,13 +2,16 @@ package api
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/localai"
|
"github.com/go-skynet/LocalAI/api/localai"
|
||||||
"github.com/go-skynet/LocalAI/api/openai"
|
"github.com/go-skynet/LocalAI/api/openai"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
"github.com/go-skynet/LocalAI/internal"
|
"github.com/go-skynet/LocalAI/internal"
|
||||||
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@@ -19,7 +22,7 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func App(opts ...options.AppOption) (*fiber.App, error) {
|
func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader, error) {
|
||||||
options := options.NewOptions(opts...)
|
options := options.NewOptions(opts...)
|
||||||
|
|
||||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||||
@@ -27,6 +30,65 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
|
||||||
|
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
||||||
|
|
||||||
|
cl := config.NewConfigLoader()
|
||||||
|
if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
|
||||||
|
log.Error().Msgf("error loading config files: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.ConfigFile != "" {
|
||||||
|
if err := cl.LoadConfigFile(options.ConfigFile); err != nil {
|
||||||
|
log.Error().Msgf("error loading config file: %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.Debug {
|
||||||
|
for _, v := range cl.ListConfigs() {
|
||||||
|
cfg, _ := cl.GetConfig(v)
|
||||||
|
log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.AssetsDestination != "" {
|
||||||
|
// Extract files from the embedded FS
|
||||||
|
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
|
||||||
|
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.PreloadJSONModels != "" {
|
||||||
|
if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.PreloadModelsFromPath != "" {
|
||||||
|
if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// turn off any process that was started by GRPC if the context is canceled
|
||||||
|
go func() {
|
||||||
|
<-options.Context.Done()
|
||||||
|
log.Debug().Msgf("Context canceled, shutting down")
|
||||||
|
options.Loader.StopAllGRPC()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return options, cl, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func App(opts ...options.AppOption) (*fiber.App, error) {
|
||||||
|
|
||||||
|
options, cl, err := Startup(opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
// Return errors as JSON responses
|
// Return errors as JSON responses
|
||||||
app := fiber.New(fiber.Config{
|
app := fiber.New(fiber.Config{
|
||||||
BodyLimit: options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
BodyLimit: options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||||
@@ -44,8 +106,8 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
|
|
||||||
// Send custom error page
|
// Send custom error page
|
||||||
return ctx.Status(code).JSON(
|
return ctx.Status(code).JSON(
|
||||||
openai.ErrorResponse{
|
schema.ErrorResponse{
|
||||||
Error: &openai.APIError{Message: err.Error(), Code: code},
|
Error: &schema.APIError{Message: err.Error(), Code: code},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
@@ -57,38 +119,11 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
|
|
||||||
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
|
||||||
|
|
||||||
cm := config.NewConfigLoader()
|
|
||||||
if err := cm.LoadConfigs(options.Loader.ModelPath); err != nil {
|
|
||||||
log.Error().Msgf("error loading config files: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.ConfigFile != "" {
|
|
||||||
if err := cm.LoadConfigFile(options.ConfigFile); err != nil {
|
|
||||||
log.Error().Msgf("error loading config file: %s", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.Debug {
|
|
||||||
for _, v := range cm.ListConfigs() {
|
|
||||||
cfg, _ := cm.GetConfig(v)
|
|
||||||
log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.AssetsDestination != "" {
|
|
||||||
// Extract files from the embedded FS
|
|
||||||
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
|
|
||||||
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default middleware config
|
// Default middleware config
|
||||||
app.Use(recover.New())
|
app.Use(recover.New())
|
||||||
|
if options.Metrics != nil {
|
||||||
|
app.Use(metrics.APIMiddleware(options.Metrics))
|
||||||
|
}
|
||||||
|
|
||||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||||
auth := func(c *fiber.Ctx) error {
|
auth := func(c *fiber.Ctx) error {
|
||||||
@@ -116,18 +151,6 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
return c.Next()
|
return c.Next()
|
||||||
}
|
}
|
||||||
|
|
||||||
if options.PreloadJSONModels != "" {
|
|
||||||
if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cm, options.Galleries); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.PreloadModelsFromPath != "" {
|
|
||||||
if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cm, options.Galleries); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.CORS {
|
if options.CORS {
|
||||||
var c func(ctx *fiber.Ctx) error
|
var c func(ctx *fiber.Ctx) error
|
||||||
if options.CORSAllowOrigins == "" {
|
if options.CORSAllowOrigins == "" {
|
||||||
@@ -141,7 +164,7 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
|
|
||||||
// LocalAI API endpoints
|
// LocalAI API endpoints
|
||||||
galleryService := localai.NewGalleryService(options.Loader.ModelPath)
|
galleryService := localai.NewGalleryService(options.Loader.ModelPath)
|
||||||
galleryService.Start(options.Context, cm)
|
galleryService.Start(options.Context, cl)
|
||||||
|
|
||||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||||
return c.JSON(struct {
|
return c.JSON(struct {
|
||||||
@@ -149,36 +172,41 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
}{Version: internal.PrintableVersion()})
|
}{Version: internal.PrintableVersion()})
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Post("/models/apply", auth, localai.ApplyModelGalleryEndpoint(options.Loader.ModelPath, cm, galleryService.C, options.Galleries))
|
modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService)
|
||||||
app.Get("/models/available", auth, localai.ListModelFromGalleryEndpoint(options.Galleries, options.Loader.ModelPath))
|
app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint())
|
||||||
app.Get("/models/jobs/:uuid", auth, localai.GetOpStatusEndpoint(galleryService))
|
app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint())
|
||||||
|
app.Get("/models/galleries", auth, modelGalleryService.ListModelGalleriesEndpoint())
|
||||||
|
app.Post("/models/galleries", auth, modelGalleryService.AddModelGalleryEndpoint())
|
||||||
|
app.Delete("/models/galleries", auth, modelGalleryService.RemoveModelGalleryEndpoint())
|
||||||
|
app.Get("/models/jobs/:uuid", auth, modelGalleryService.GetOpStatusEndpoint())
|
||||||
|
app.Get("/models/jobs", auth, modelGalleryService.GetAllStatusEndpoint())
|
||||||
|
|
||||||
// openAI compatible API endpoint
|
// openAI compatible API endpoint
|
||||||
|
|
||||||
// chat
|
// chat
|
||||||
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cm, options))
|
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, options))
|
||||||
app.Post("/chat/completions", auth, openai.ChatEndpoint(cm, options))
|
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, options))
|
||||||
|
|
||||||
// edit
|
// edit
|
||||||
app.Post("/v1/edits", auth, openai.EditEndpoint(cm, options))
|
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, options))
|
||||||
app.Post("/edits", auth, openai.EditEndpoint(cm, options))
|
app.Post("/edits", auth, openai.EditEndpoint(cl, options))
|
||||||
|
|
||||||
// completion
|
// completion
|
||||||
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cm, options))
|
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, options))
|
||||||
app.Post("/completions", auth, openai.CompletionEndpoint(cm, options))
|
app.Post("/completions", auth, openai.CompletionEndpoint(cl, options))
|
||||||
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cm, options))
|
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, options))
|
||||||
|
|
||||||
// embeddings
|
// embeddings
|
||||||
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cm, options))
|
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
|
||||||
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cm, options))
|
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
|
||||||
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cm, options))
|
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, options))
|
||||||
|
|
||||||
// audio
|
// audio
|
||||||
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cm, options))
|
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, options))
|
||||||
app.Post("/tts", auth, localai.TTSEndpoint(cm, options))
|
app.Post("/tts", auth, localai.TTSEndpoint(cl, options))
|
||||||
|
|
||||||
// images
|
// images
|
||||||
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cm, options))
|
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, options))
|
||||||
|
|
||||||
if options.ImageDir != "" {
|
if options.ImageDir != "" {
|
||||||
app.Static("/generated-images", options.ImageDir)
|
app.Static("/generated-images", options.ImageDir)
|
||||||
@@ -196,16 +224,16 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
app.Get("/healthz", ok)
|
app.Get("/healthz", ok)
|
||||||
app.Get("/readyz", ok)
|
app.Get("/readyz", ok)
|
||||||
|
|
||||||
// models
|
// Experimental Backend Statistics Module
|
||||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cm))
|
backendMonitor := localai.NewBackendMonitor(cl, options) // Split out for now
|
||||||
app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cm))
|
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitor))
|
||||||
|
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitor))
|
||||||
|
|
||||||
// turn off any process that was started by GRPC if the context is canceled
|
// models
|
||||||
go func() {
|
app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||||
<-options.Context.Done()
|
app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||||
log.Debug().Msgf("Context canceled, shutting down")
|
|
||||||
options.Loader.StopGRPC()
|
app.Get("/metrics", metrics.MetricsHandler())
|
||||||
}()
|
|
||||||
|
|
||||||
return app, nil
|
return app, nil
|
||||||
}
|
}
|
||||||
|
|||||||
101
api/api_test.go
101
api/api_test.go
@@ -15,6 +15,7 @@ import (
|
|||||||
|
|
||||||
. "github.com/go-skynet/LocalAI/api"
|
. "github.com/go-skynet/LocalAI/api"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||||
"github.com/go-skynet/LocalAI/pkg/model"
|
"github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
@@ -162,8 +163,12 @@ var _ = Describe("API test", func() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
options.WithGalleries(galleries),
|
options.WithGalleries(galleries),
|
||||||
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
|
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
|
||||||
@@ -296,7 +301,7 @@ var _ = Describe("API test", func() {
|
|||||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||||
Name: "openllama_3b",
|
Name: "openllama_3b",
|
||||||
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
|
Overrides: map[string]interface{}{"backend": "llama-stable", "mmap": true, "f16": true, "context_size": 128},
|
||||||
})
|
})
|
||||||
|
|
||||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
@@ -359,6 +364,82 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("runs openllama gguf", Label("llama-gguf"), func() {
|
||||||
|
if runtime.GOOS != "linux" {
|
||||||
|
Skip("test supported only on linux")
|
||||||
|
}
|
||||||
|
modelName := "codellama"
|
||||||
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
|
URL: "github:go-skynet/model-gallery/codellama-7b-instruct.yaml",
|
||||||
|
Name: modelName,
|
||||||
|
Overrides: map[string]interface{}{"backend": "llama", "mmap": true, "f16": true, "context_size": 128},
|
||||||
|
})
|
||||||
|
|
||||||
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
|
|
||||||
|
uuid := response["uuid"].(string)
|
||||||
|
|
||||||
|
Eventually(func() bool {
|
||||||
|
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||||
|
return response["processed"].(bool)
|
||||||
|
}, "360s", "10s").Should(Equal(true))
|
||||||
|
|
||||||
|
By("testing chat")
|
||||||
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "How much is 2+2?",
|
||||||
|
},
|
||||||
|
}})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
|
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")))
|
||||||
|
|
||||||
|
By("testing functions")
|
||||||
|
resp2, err := client.CreateChatCompletion(
|
||||||
|
context.TODO(),
|
||||||
|
openai.ChatCompletionRequest{
|
||||||
|
Model: modelName,
|
||||||
|
Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: "What is the weather like in San Francisco (celsius)?",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Functions: []openai.FunctionDefinition{
|
||||||
|
openai.FunctionDefinition{
|
||||||
|
Name: "get_current_weather",
|
||||||
|
Description: "Get the current weather",
|
||||||
|
Parameters: jsonschema.Definition{
|
||||||
|
Type: jsonschema.Object,
|
||||||
|
Properties: map[string]jsonschema.Definition{
|
||||||
|
"location": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Description: "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Enum: []string{"celcius", "fahrenheit"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"location"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp2.Choices)).To(Equal(1))
|
||||||
|
Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
|
||||||
|
Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
|
||||||
|
|
||||||
|
var res map[string]string
|
||||||
|
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
|
||||||
|
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||||
|
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||||
|
})
|
||||||
|
|
||||||
It("runs gpt4all", Label("gpt4all"), func() {
|
It("runs gpt4all", Label("gpt4all"), func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
@@ -376,7 +457,7 @@ var _ = Describe("API test", func() {
|
|||||||
Eventually(func() bool {
|
Eventually(func() bool {
|
||||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||||
return response["processed"].(bool)
|
return response["processed"].(bool)
|
||||||
}, "360s", "10s").Should(Equal(true))
|
}, "960s", "10s").Should(Equal(true))
|
||||||
|
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
@@ -403,9 +484,13 @@ var _ = Describe("API test", func() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
options.WithAudioDir(tmpdir),
|
options.WithAudioDir(tmpdir),
|
||||||
options.WithImageDir(tmpdir),
|
options.WithImageDir(tmpdir),
|
||||||
options.WithGalleries(galleries),
|
options.WithGalleries(galleries),
|
||||||
@@ -507,12 +592,15 @@ var _ = Describe("API test", func() {
|
|||||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||||
c, cancel = context.WithCancel(context.Background())
|
c, cancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
var err error
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
options.WithModelLoader(modelLoader),
|
options.WithModelLoader(modelLoader),
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
)...)
|
)...)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
go app.Listen("127.0.0.1:9090")
|
go app.Listen("127.0.0.1:9090")
|
||||||
@@ -599,7 +687,7 @@ var _ = Describe("API test", func() {
|
|||||||
Input: []string{"sun", "cat"},
|
Input: []string{"sun", "cat"},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred(), err)
|
||||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||||
|
|
||||||
@@ -716,10 +804,13 @@ var _ = Describe("API test", func() {
|
|||||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||||
c, cancel = context.WithCancel(context.Background())
|
c, cancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
var err error
|
metricsService, err := metrics.SetupMetrics()
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
app, err = App(
|
app, err = App(
|
||||||
append(commonOpts,
|
append(commonOpts,
|
||||||
options.WithContext(c),
|
options.WithContext(c),
|
||||||
|
options.WithMetrics(metricsService),
|
||||||
options.WithModelLoader(modelLoader),
|
options.WithModelLoader(modelLoader),
|
||||||
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package backend
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
@@ -22,17 +21,13 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||||||
var inferenceModel interface{}
|
var inferenceModel interface{}
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := []model.Option{
|
opts := modelOpts(c, o, []model.Option{
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||||
model.WithThreads(uint32(c.Threads)),
|
model.WithThreads(uint32(c.Threads)),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
}
|
})
|
||||||
|
|
||||||
for k, v := range o.ExternalGRPCBackends {
|
|
||||||
opts = append(opts, model.WithExternalBackend(k, v))
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.Backend == "" {
|
if c.Backend == "" {
|
||||||
inferenceModel, err = loader.GreedyLoader(opts...)
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
@@ -80,18 +75,6 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func() ([]float32, error) {
|
return func() ([]float32, error) {
|
||||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
|
||||||
mutexMap.Lock()
|
|
||||||
l, ok := mutexes[modelFile]
|
|
||||||
if !ok {
|
|
||||||
m := &sync.Mutex{}
|
|
||||||
mutexes[modelFile] = m
|
|
||||||
l = m
|
|
||||||
}
|
|
||||||
mutexMap.Unlock()
|
|
||||||
l.Lock()
|
|
||||||
defer l.Unlock()
|
|
||||||
|
|
||||||
embeds, err := fn()
|
embeds, err := fn()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return embeds, err
|
return embeds, err
|
||||||
|
|||||||
@@ -1,17 +1,15 @@
|
|||||||
package backend
|
package backend
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
|
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
|
||||||
|
|
||||||
opts := []model.Option{
|
opts := modelOpts(c, o, []model.Option{
|
||||||
model.WithBackendString(c.Backend),
|
model.WithBackendString(c.Backend),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
model.WithThreads(uint32(c.Threads)),
|
model.WithThreads(uint32(c.Threads)),
|
||||||
@@ -21,12 +19,16 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||||||
CUDA: c.Diffusers.CUDA,
|
CUDA: c.Diffusers.CUDA,
|
||||||
SchedulerType: c.Diffusers.SchedulerType,
|
SchedulerType: c.Diffusers.SchedulerType,
|
||||||
PipelineType: c.Diffusers.PipelineType,
|
PipelineType: c.Diffusers.PipelineType,
|
||||||
|
CFGScale: c.Diffusers.CFGScale,
|
||||||
|
LoraAdapter: c.LoraAdapter,
|
||||||
|
LoraScale: c.LoraScale,
|
||||||
|
LoraBase: c.LoraBase,
|
||||||
|
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||||
|
CLIPModel: c.Diffusers.ClipModel,
|
||||||
|
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||||
|
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||||
}),
|
}),
|
||||||
}
|
})
|
||||||
|
|
||||||
for k, v := range o.ExternalGRPCBackends {
|
|
||||||
opts = append(opts, model.WithExternalBackend(k, v))
|
|
||||||
}
|
|
||||||
|
|
||||||
inferenceModel, err := loader.BackendLoader(
|
inferenceModel, err := loader.BackendLoader(
|
||||||
opts...,
|
opts...,
|
||||||
@@ -39,31 +41,20 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||||||
_, err := inferenceModel.GenerateImage(
|
_, err := inferenceModel.GenerateImage(
|
||||||
o.Context,
|
o.Context,
|
||||||
&proto.GenerateImageRequest{
|
&proto.GenerateImageRequest{
|
||||||
Height: int32(height),
|
Height: int32(height),
|
||||||
Width: int32(width),
|
Width: int32(width),
|
||||||
Mode: int32(mode),
|
Mode: int32(mode),
|
||||||
Step: int32(step),
|
Step: int32(step),
|
||||||
Seed: int32(seed),
|
Seed: int32(seed),
|
||||||
PositivePrompt: positive_prompt,
|
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||||
NegativePrompt: negative_prompt,
|
PositivePrompt: positive_prompt,
|
||||||
Dst: dst,
|
NegativePrompt: negative_prompt,
|
||||||
|
Dst: dst,
|
||||||
|
Src: src,
|
||||||
|
EnableParameters: c.Diffusers.EnableParameters,
|
||||||
})
|
})
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return func() error {
|
return fn, nil
|
||||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
|
||||||
mutexMap.Lock()
|
|
||||||
l, ok := mutexes[c.Backend]
|
|
||||||
if !ok {
|
|
||||||
m := &sync.Mutex{}
|
|
||||||
mutexes[c.Backend] = m
|
|
||||||
l = m
|
|
||||||
}
|
|
||||||
mutexMap.Unlock()
|
|
||||||
l.Lock()
|
|
||||||
defer l.Unlock()
|
|
||||||
|
|
||||||
return fn()
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
@@ -15,7 +16,17 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string) bool) (func() (string, error), error) {
|
type LLMResponse struct {
|
||||||
|
Response string // should this be []byte?
|
||||||
|
Usage TokenUsage
|
||||||
|
}
|
||||||
|
|
||||||
|
type TokenUsage struct {
|
||||||
|
Prompt int
|
||||||
|
Completion int
|
||||||
|
}
|
||||||
|
|
||||||
|
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(c)
|
grpcOpts := gRPCModelOpts(c)
|
||||||
@@ -23,17 +34,13 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
|||||||
var inferenceModel *grpc.Client
|
var inferenceModel *grpc.Client
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := []model.Option{
|
opts := modelOpts(c, o, []model.Option{
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||||
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
|
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
}
|
})
|
||||||
|
|
||||||
for k, v := range o.ExternalGRPCBackends {
|
|
||||||
opts = append(opts, model.WithExternalBackend(k, v))
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.Backend != "" {
|
if c.Backend != "" {
|
||||||
opts = append(opts, model.WithBackendString(c.Backend))
|
opts = append(opts, model.WithBackendString(c.Backend))
|
||||||
@@ -62,40 +69,72 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||||
fn := func() (string, error) {
|
fn := func() (LLMResponse, error) {
|
||||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||||
opts.Prompt = s
|
opts.Prompt = s
|
||||||
|
opts.Images = images
|
||||||
|
|
||||||
|
tokenUsage := TokenUsage{}
|
||||||
|
|
||||||
|
// check the per-model feature flag for usage, since tokenCallback may have a cost.
|
||||||
|
// Defaults to off as for now it is still experimental
|
||||||
|
if c.FeatureFlag.Enabled("usage") {
|
||||||
|
userTokenCallback := tokenCallback
|
||||||
|
if userTokenCallback == nil {
|
||||||
|
userTokenCallback = func(token string, usage TokenUsage) bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
promptInfo, pErr := inferenceModel.TokenizeString(ctx, opts)
|
||||||
|
if pErr == nil && promptInfo.Length > 0 {
|
||||||
|
tokenUsage.Prompt = int(promptInfo.Length)
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenCallback = func(token string, usage TokenUsage) bool {
|
||||||
|
tokenUsage.Completion++
|
||||||
|
return userTokenCallback(token, tokenUsage)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if tokenCallback != nil {
|
if tokenCallback != nil {
|
||||||
ss := ""
|
ss := ""
|
||||||
err := inferenceModel.PredictStream(ctx, opts, func(s []byte) {
|
|
||||||
tokenCallback(string(s))
|
var partialRune []byte
|
||||||
ss += string(s)
|
err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
|
||||||
|
partialRune = append(partialRune, chars...)
|
||||||
|
|
||||||
|
for len(partialRune) > 0 {
|
||||||
|
r, size := utf8.DecodeRune(partialRune)
|
||||||
|
if r == utf8.RuneError {
|
||||||
|
// incomplete rune, wait for more bytes
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenCallback(string(r), tokenUsage)
|
||||||
|
ss += string(r)
|
||||||
|
|
||||||
|
partialRune = partialRune[size:]
|
||||||
|
}
|
||||||
})
|
})
|
||||||
return ss, err
|
return LLMResponse{
|
||||||
|
Response: ss,
|
||||||
|
Usage: tokenUsage,
|
||||||
|
}, err
|
||||||
} else {
|
} else {
|
||||||
|
// TODO: Is the chicken bit the only way to get here? is that acceptable?
|
||||||
reply, err := inferenceModel.Predict(ctx, opts)
|
reply, err := inferenceModel.Predict(ctx, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return LLMResponse{}, err
|
||||||
}
|
}
|
||||||
return string(reply.Message), err
|
return LLMResponse{
|
||||||
|
Response: string(reply.Message),
|
||||||
|
Usage: tokenUsage,
|
||||||
|
}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return func() (string, error) {
|
return fn, nil
|
||||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
|
||||||
mutexMap.Lock()
|
|
||||||
l, ok := mutexes[modelFile]
|
|
||||||
if !ok {
|
|
||||||
m := &sync.Mutex{}
|
|
||||||
mutexes[modelFile] = m
|
|
||||||
l = m
|
|
||||||
}
|
|
||||||
mutexMap.Unlock()
|
|
||||||
l.Lock()
|
|
||||||
defer l.Unlock()
|
|
||||||
|
|
||||||
return fn()
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
|
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
|
||||||
|
|||||||
@@ -1,22 +0,0 @@
|
|||||||
package backend
|
|
||||||
|
|
||||||
import "sync"
|
|
||||||
|
|
||||||
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
|
||||||
var mutexMap sync.Mutex
|
|
||||||
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
|
|
||||||
|
|
||||||
func Lock(s string) *sync.Mutex {
|
|
||||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
|
||||||
mutexMap.Lock()
|
|
||||||
l, ok := mutexes[s]
|
|
||||||
if !ok {
|
|
||||||
m := &sync.Mutex{}
|
|
||||||
mutexes[s] = m
|
|
||||||
l = m
|
|
||||||
}
|
|
||||||
mutexMap.Unlock()
|
|
||||||
l.Lock()
|
|
||||||
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
@@ -5,39 +5,75 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.Option {
|
||||||
|
if o.SingleBackend {
|
||||||
|
opts = append(opts, model.WithSingleActiveBackend())
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.GRPC.Attempts != 0 {
|
||||||
|
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.GRPC.AttemptsSleepTime != 0 {
|
||||||
|
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range o.ExternalGRPCBackends {
|
||||||
|
opts = append(opts, model.WithExternalBackend(k, v))
|
||||||
|
}
|
||||||
|
|
||||||
|
return opts
|
||||||
|
}
|
||||||
|
|
||||||
func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
||||||
b := 512
|
b := 512
|
||||||
if c.Batch != 0 {
|
if c.Batch != 0 {
|
||||||
b = c.Batch
|
b = c.Batch
|
||||||
}
|
}
|
||||||
return &pb.ModelOptions{
|
|
||||||
ContextSize: int32(c.ContextSize),
|
|
||||||
Seed: int32(c.Seed),
|
|
||||||
NBatch: int32(b),
|
|
||||||
NGQA: c.NGQA,
|
|
||||||
|
|
||||||
RMSNormEps: c.RMSNormEps,
|
return &pb.ModelOptions{
|
||||||
F16Memory: c.F16,
|
ContextSize: int32(c.ContextSize),
|
||||||
MLock: c.MMlock,
|
Seed: int32(c.Seed),
|
||||||
RopeFreqBase: c.RopeFreqBase,
|
NBatch: int32(b),
|
||||||
RopeFreqScale: c.RopeFreqScale,
|
NoMulMatQ: c.NoMulMatQ,
|
||||||
NUMA: c.NUMA,
|
DraftModel: c.DraftModel,
|
||||||
Embeddings: c.Embeddings,
|
AudioPath: c.VallE.AudioPath,
|
||||||
LowVRAM: c.LowVRAM,
|
Quantization: c.Quantization,
|
||||||
NGPULayers: int32(c.NGPULayers),
|
MMProj: c.MMProj,
|
||||||
MMap: c.MMap,
|
YarnExtFactor: c.YarnExtFactor,
|
||||||
MainGPU: c.MainGPU,
|
YarnAttnFactor: c.YarnAttnFactor,
|
||||||
Threads: int32(c.Threads),
|
YarnBetaFast: c.YarnBetaFast,
|
||||||
TensorSplit: c.TensorSplit,
|
YarnBetaSlow: c.YarnBetaSlow,
|
||||||
|
LoraAdapter: c.LoraAdapter,
|
||||||
|
LoraBase: c.LoraBase,
|
||||||
|
LoraScale: c.LoraScale,
|
||||||
|
NGQA: c.NGQA,
|
||||||
|
RMSNormEps: c.RMSNormEps,
|
||||||
|
F16Memory: c.F16,
|
||||||
|
MLock: c.MMlock,
|
||||||
|
RopeFreqBase: c.RopeFreqBase,
|
||||||
|
RopeFreqScale: c.RopeFreqScale,
|
||||||
|
NUMA: c.NUMA,
|
||||||
|
Embeddings: c.Embeddings,
|
||||||
|
LowVRAM: c.LowVRAM,
|
||||||
|
NGPULayers: int32(c.NGPULayers),
|
||||||
|
MMap: c.MMap,
|
||||||
|
MainGPU: c.MainGPU,
|
||||||
|
Threads: int32(c.Threads),
|
||||||
|
TensorSplit: c.TensorSplit,
|
||||||
// AutoGPTQ
|
// AutoGPTQ
|
||||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
||||||
Device: c.AutoGPTQ.Device,
|
Device: c.AutoGPTQ.Device,
|
||||||
UseTriton: c.AutoGPTQ.Triton,
|
UseTriton: c.AutoGPTQ.Triton,
|
||||||
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
|
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
|
||||||
|
// RWKV
|
||||||
|
Tokenizer: c.Tokenizer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,6 +87,7 @@ func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions {
|
|||||||
return &pb.PredictOptions{
|
return &pb.PredictOptions{
|
||||||
Temperature: float32(c.Temperature),
|
Temperature: float32(c.Temperature),
|
||||||
TopP: float32(c.TopP),
|
TopP: float32(c.TopP),
|
||||||
|
NDraft: c.NDraft,
|
||||||
TopK: int32(c.TopK),
|
TopK: int32(c.TopK),
|
||||||
Tokens: int32(c.Maxtokens),
|
Tokens: int32(c.Maxtokens),
|
||||||
Threads: int32(c.Threads),
|
Threads: int32(c.Threads),
|
||||||
|
|||||||
@@ -5,25 +5,22 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
|
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*schema.Result, error) {
|
||||||
opts := []model.Option{
|
|
||||||
|
opts := modelOpts(c, o, []model.Option{
|
||||||
model.WithBackendString(model.WhisperBackend),
|
model.WithBackendString(model.WhisperBackend),
|
||||||
model.WithModel(c.Model),
|
model.WithModel(c.Model),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
model.WithThreads(uint32(c.Threads)),
|
model.WithThreads(uint32(c.Threads)),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
}
|
})
|
||||||
|
|
||||||
for k, v := range o.ExternalGRPCBackends {
|
|
||||||
opts = append(opts, model.WithExternalBackend(k, v))
|
|
||||||
}
|
|
||||||
|
|
||||||
whisperModel, err := o.Loader.BackendLoader(opts...)
|
whisperModel, err := o.Loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
|
api_config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
@@ -33,17 +34,12 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *opt
|
|||||||
if bb == "" {
|
if bb == "" {
|
||||||
bb = model.PiperBackend
|
bb = model.PiperBackend
|
||||||
}
|
}
|
||||||
opts := []model.Option{
|
opts := modelOpts(api_config.Config{}, o, []model.Option{
|
||||||
model.WithBackendString(bb),
|
model.WithBackendString(bb),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
}
|
})
|
||||||
|
|
||||||
for k, v := range o.ExternalGRPCBackends {
|
|
||||||
opts = append(opts, model.WithExternalBackend(k, v))
|
|
||||||
}
|
|
||||||
|
|
||||||
piperModel, err := o.Loader.BackendLoader(opts...)
|
piperModel, err := o.Loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ type Config struct {
|
|||||||
|
|
||||||
FunctionsConfig Functions `yaml:"function"`
|
FunctionsConfig Functions `yaml:"function"`
|
||||||
|
|
||||||
|
FeatureFlag FeatureFlag `yaml:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
|
||||||
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
// LLM configs (GPT4ALL, Llama.cpp, ...)
|
||||||
LLMConfig `yaml:",inline"`
|
LLMConfig `yaml:",inline"`
|
||||||
|
|
||||||
@@ -39,12 +40,40 @@ type Config struct {
|
|||||||
Diffusers Diffusers `yaml:"diffusers"`
|
Diffusers Diffusers `yaml:"diffusers"`
|
||||||
|
|
||||||
Step int `yaml:"step"`
|
Step int `yaml:"step"`
|
||||||
|
|
||||||
|
// GRPC Options
|
||||||
|
GRPC GRPC `yaml:"grpc"`
|
||||||
|
|
||||||
|
// Vall-e-x
|
||||||
|
VallE VallE `yaml:"vall-e"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type VallE struct {
|
||||||
|
AudioPath string `yaml:"audio_path"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type FeatureFlag map[string]*bool
|
||||||
|
|
||||||
|
func (ff FeatureFlag) Enabled(s string) bool {
|
||||||
|
v, exist := ff[s]
|
||||||
|
return exist && v != nil && *v
|
||||||
|
}
|
||||||
|
|
||||||
|
type GRPC struct {
|
||||||
|
Attempts int `yaml:"attempts"`
|
||||||
|
AttemptsSleepTime int `yaml:"attempts_sleep_time"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Diffusers struct {
|
type Diffusers struct {
|
||||||
PipelineType string `yaml:"pipeline_type"`
|
PipelineType string `yaml:"pipeline_type"`
|
||||||
SchedulerType string `yaml:"scheduler_type"`
|
SchedulerType string `yaml:"scheduler_type"`
|
||||||
CUDA bool `yaml:"cuda"`
|
CUDA bool `yaml:"cuda"`
|
||||||
|
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
||||||
|
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
||||||
|
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
||||||
|
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
||||||
|
ClipModel string `yaml:"clip_model"` // Clip model to use
|
||||||
|
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
||||||
}
|
}
|
||||||
|
|
||||||
type LLMConfig struct {
|
type LLMConfig struct {
|
||||||
@@ -69,6 +98,20 @@ type LLMConfig struct {
|
|||||||
TrimSpace []string `yaml:"trimspace"`
|
TrimSpace []string `yaml:"trimspace"`
|
||||||
ContextSize int `yaml:"context_size"`
|
ContextSize int `yaml:"context_size"`
|
||||||
NUMA bool `yaml:"numa"`
|
NUMA bool `yaml:"numa"`
|
||||||
|
LoraAdapter string `yaml:"lora_adapter"`
|
||||||
|
LoraBase string `yaml:"lora_base"`
|
||||||
|
LoraScale float32 `yaml:"lora_scale"`
|
||||||
|
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||||
|
DraftModel string `yaml:"draft_model"`
|
||||||
|
NDraft int32 `yaml:"n_draft"`
|
||||||
|
Quantization string `yaml:"quantization"`
|
||||||
|
MMProj string `yaml:"mmproj"`
|
||||||
|
|
||||||
|
RopeScaling string `yaml:"rope_scaling"`
|
||||||
|
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
|
||||||
|
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
||||||
|
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
||||||
|
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type AutoGPTQ struct {
|
type AutoGPTQ struct {
|
||||||
|
|||||||
@@ -41,4 +41,10 @@ type PredictionOptions struct {
|
|||||||
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
|
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
|
||||||
// AutoGPTQ
|
// AutoGPTQ
|
||||||
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
|
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
|
||||||
|
|
||||||
|
// Diffusers
|
||||||
|
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
|
||||||
|
|
||||||
|
// RWKV (?)
|
||||||
|
Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
|
||||||
}
|
}
|
||||||
|
|||||||
163
api/localai/backend_monitor.go
Normal file
163
api/localai/backend_monitor.go
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
|
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||||
|
)
|
||||||
|
|
||||||
|
type BackendMonitorRequest struct {
|
||||||
|
Model string `json:"model" yaml:"model"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type BackendMonitorResponse struct {
|
||||||
|
MemoryInfo *gopsutil.MemoryInfoStat
|
||||||
|
MemoryPercent float32
|
||||||
|
CPUPercent float64
|
||||||
|
}
|
||||||
|
|
||||||
|
type BackendMonitor struct {
|
||||||
|
configLoader *config.ConfigLoader
|
||||||
|
options *options.Option // Taking options in case we need to inspect ExternalGRPCBackends, though that's out of scope for now, hence the name.
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewBackendMonitor(configLoader *config.ConfigLoader, options *options.Option) BackendMonitor {
|
||||||
|
return BackendMonitor{
|
||||||
|
configLoader: configLoader,
|
||||||
|
options: options,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bm *BackendMonitor) SampleLocalBackendProcess(model string) (*BackendMonitorResponse, error) {
|
||||||
|
config, exists := bm.configLoader.GetConfig(model)
|
||||||
|
var backend string
|
||||||
|
if exists {
|
||||||
|
backend = config.Model
|
||||||
|
} else {
|
||||||
|
// Last ditch effort: use it raw, see if a backend happens to match.
|
||||||
|
backend = model
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.HasSuffix(backend, ".bin") {
|
||||||
|
backend = fmt.Sprintf("%s.bin", backend)
|
||||||
|
}
|
||||||
|
|
||||||
|
pid, err := bm.options.Loader.GetGRPCPID(backend)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("model %s : failed to find pid %+v", model, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name is slightly frightening but this does _not_ create a new process, rather it looks up an existing process by PID.
|
||||||
|
backendProcess, err := gopsutil.NewProcess(int32(pid))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("model %s [PID %d] : error getting process info %+v", model, pid, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
memInfo, err := backendProcess.MemoryInfo()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("model %s [PID %d] : error getting memory info %+v", model, pid, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
memPercent, err := backendProcess.MemoryPercent()
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("model %s [PID %d] : error getting memory percent %+v", model, pid, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuPercent, err := backendProcess.CPUPercent()
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("model %s [PID %d] : error getting cpu percent %+v", model, pid, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &BackendMonitorResponse{
|
||||||
|
MemoryInfo: memInfo,
|
||||||
|
MemoryPercent: memPercent,
|
||||||
|
CPUPercent: cpuPercent,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bm BackendMonitor) getModelLoaderIDFromCtx(c *fiber.Ctx) (string, error) {
|
||||||
|
input := new(BackendMonitorRequest)
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
config, exists := bm.configLoader.GetConfig(input.Model)
|
||||||
|
var backendId string
|
||||||
|
if exists {
|
||||||
|
backendId = config.Model
|
||||||
|
} else {
|
||||||
|
// Last ditch effort: use it raw, see if a backend happens to match.
|
||||||
|
backendId = input.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.HasSuffix(backendId, ".bin") {
|
||||||
|
backendId = fmt.Sprintf("%s.bin", backendId)
|
||||||
|
}
|
||||||
|
|
||||||
|
return backendId, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
|
backendId, err := bm.getModelLoaderIDFromCtx(c)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
client := bm.options.Loader.CheckIsLoaded(backendId)
|
||||||
|
|
||||||
|
if client == nil {
|
||||||
|
return fmt.Errorf("backend %s is not currently loaded", backendId)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, rpcErr := client.Status(context.TODO())
|
||||||
|
if rpcErr != nil {
|
||||||
|
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
|
||||||
|
val, slbErr := bm.SampleLocalBackendProcess(backendId)
|
||||||
|
if slbErr != nil {
|
||||||
|
return fmt.Errorf("backend %s experienced an error retrieving status info via rpc: %s, then failed local node process sample: %s", backendId, rpcErr.Error(), slbErr.Error())
|
||||||
|
}
|
||||||
|
return c.JSON(proto.StatusResponse{
|
||||||
|
State: proto.StatusResponse_ERROR,
|
||||||
|
Memory: &proto.MemoryUsageData{
|
||||||
|
Total: val.MemoryInfo.VMS,
|
||||||
|
Breakdown: map[string]uint64{
|
||||||
|
"gopsutil-RSS": val.MemoryInfo.RSS,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.JSON(status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BackendShutdownEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
backendId, err := bm.getModelLoaderIDFromCtx(c)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return bm.options.Loader.ShutdownModel(backendId)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
@@ -27,6 +28,7 @@ type galleryOp struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type galleryOpStatus struct {
|
type galleryOpStatus struct {
|
||||||
|
FileName string `json:"file_name"`
|
||||||
Error error `json:"error"`
|
Error error `json:"error"`
|
||||||
Processed bool `json:"processed"`
|
Processed bool `json:"processed"`
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
@@ -50,7 +52,6 @@ func NewGalleryService(modelPath string) *galleryApplier {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// prepareModel applies a
|
|
||||||
func prepareModel(modelPath string, req gallery.GalleryModel, cm *config.ConfigLoader, downloadStatus func(string, string, string, float64)) error {
|
func prepareModel(modelPath string, req gallery.GalleryModel, cm *config.ConfigLoader, downloadStatus func(string, string, string, float64)) error {
|
||||||
|
|
||||||
config, err := gallery.GetGalleryConfigFromURL(req.URL)
|
config, err := gallery.GetGalleryConfigFromURL(req.URL)
|
||||||
@@ -76,6 +77,13 @@ func (g *galleryApplier) getStatus(s string) *galleryOpStatus {
|
|||||||
return g.statuses[s]
|
return g.statuses[s]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (g *galleryApplier) getAllStatus() map[string]*galleryOpStatus {
|
||||||
|
g.Lock()
|
||||||
|
defer g.Unlock()
|
||||||
|
|
||||||
|
return g.statuses
|
||||||
|
}
|
||||||
|
|
||||||
func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
|
func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
|
||||||
go func() {
|
go func() {
|
||||||
for {
|
for {
|
||||||
@@ -94,7 +102,7 @@ func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
|
|||||||
|
|
||||||
// displayDownload displays the download progress
|
// displayDownload displays the download progress
|
||||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||||
g.updateStatus(op.id, &galleryOpStatus{Message: "processing", Progress: percentage, TotalFileSize: total, DownloadedFileSize: current})
|
g.updateStatus(op.id, &galleryOpStatus{Message: "processing", FileName: fileName, Progress: percentage, TotalFileSize: total, DownloadedFileSize: current})
|
||||||
utils.DisplayDownloadFunction(fileName, current, total, percentage)
|
utils.DisplayDownloadFunction(fileName, current, total, percentage)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,18 +184,12 @@ func ApplyGalleryFromString(modelPath, s string, cm *config.ConfigLoader, galler
|
|||||||
return processRequests(modelPath, s, cm, galleries, requests)
|
return processRequests(modelPath, s, cm, galleries, requests)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Endpoints
|
/// Endpoint Service
|
||||||
|
|
||||||
func GetOpStatusEndpoint(g *galleryApplier) func(c *fiber.Ctx) error {
|
type ModelGalleryService struct {
|
||||||
return func(c *fiber.Ctx) error {
|
galleries []gallery.Gallery
|
||||||
|
modelPath string
|
||||||
status := g.getStatus(c.Params("uuid"))
|
galleryApplier *galleryApplier
|
||||||
if status == nil {
|
|
||||||
return fmt.Errorf("could not find any status for ID")
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.JSON(status)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type GalleryModel struct {
|
type GalleryModel struct {
|
||||||
@@ -195,7 +197,31 @@ type GalleryModel struct {
|
|||||||
gallery.GalleryModel
|
gallery.GalleryModel
|
||||||
}
|
}
|
||||||
|
|
||||||
func ApplyModelGalleryEndpoint(modelPath string, cm *config.ConfigLoader, g chan galleryOp, galleries []gallery.Gallery) func(c *fiber.Ctx) error {
|
func CreateModelGalleryService(galleries []gallery.Gallery, modelPath string, galleryApplier *galleryApplier) ModelGalleryService {
|
||||||
|
return ModelGalleryService{
|
||||||
|
galleries: galleries,
|
||||||
|
modelPath: modelPath,
|
||||||
|
galleryApplier: galleryApplier,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mgs *ModelGalleryService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
status := mgs.galleryApplier.getStatus(c.Params("uuid"))
|
||||||
|
if status == nil {
|
||||||
|
return fmt.Errorf("could not find any status for ID")
|
||||||
|
}
|
||||||
|
return c.JSON(status)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mgs *ModelGalleryService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
return c.JSON(mgs.galleryApplier.getAllStatus())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mgs *ModelGalleryService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input := new(GalleryModel)
|
input := new(GalleryModel)
|
||||||
// Get input data from the request body
|
// Get input data from the request body
|
||||||
@@ -207,11 +233,11 @@ func ApplyModelGalleryEndpoint(modelPath string, cm *config.ConfigLoader, g chan
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
g <- galleryOp{
|
mgs.galleryApplier.C <- galleryOp{
|
||||||
req: input.GalleryModel,
|
req: input.GalleryModel,
|
||||||
id: uuid.String(),
|
id: uuid.String(),
|
||||||
galleryName: input.ID,
|
galleryName: input.ID,
|
||||||
galleries: galleries,
|
galleries: mgs.galleries,
|
||||||
}
|
}
|
||||||
return c.JSON(struct {
|
return c.JSON(struct {
|
||||||
ID string `json:"uuid"`
|
ID string `json:"uuid"`
|
||||||
@@ -220,11 +246,11 @@ func ApplyModelGalleryEndpoint(modelPath string, cm *config.ConfigLoader, g chan
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ListModelFromGalleryEndpoint(galleries []gallery.Gallery, basePath string) func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
log.Debug().Msgf("Listing models from galleries: %+v", galleries)
|
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||||
|
|
||||||
models, err := gallery.AvailableGalleryModels(galleries, basePath)
|
models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -239,3 +265,56 @@ func ListModelFromGalleryEndpoint(galleries []gallery.Gallery, basePath string)
|
|||||||
return c.Send(dat)
|
return c.Send(dat)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
|
||||||
|
func (mgs *ModelGalleryService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
log.Debug().Msgf("Listing model galleries %+v", mgs.galleries)
|
||||||
|
dat, err := json.Marshal(mgs.galleries)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return c.Send(dat)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mgs *ModelGalleryService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
input := new(gallery.Gallery)
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
|
||||||
|
return gallery.Name == input.Name
|
||||||
|
}) {
|
||||||
|
return fmt.Errorf("%s already exists", input.Name)
|
||||||
|
}
|
||||||
|
dat, err := json.Marshal(mgs.galleries)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Adding %+v to gallery list", *input)
|
||||||
|
mgs.galleries = append(mgs.galleries, *input)
|
||||||
|
return c.Send(dat)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mgs *ModelGalleryService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
input := new(gallery.Gallery)
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !slices.ContainsFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
|
||||||
|
return gallery.Name == input.Name
|
||||||
|
}) {
|
||||||
|
return fmt.Errorf("%s is not currently registered", input.Name)
|
||||||
|
}
|
||||||
|
mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery gallery.Gallery) bool {
|
||||||
|
return gallery.Name == input.Name
|
||||||
|
})
|
||||||
|
return c.Send(nil)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,34 +6,48 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
emptyMessage := ""
|
emptyMessage := ""
|
||||||
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
|
|
||||||
process := func(s string, req *OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
|
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||||
initialMessage := OpenAIResponse{
|
initialMessage := schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: &emptyMessage}}},
|
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
}
|
}
|
||||||
responses <- initialMessage
|
responses <- initialMessage
|
||||||
|
|
||||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]Choice) {}, func(s string) bool {
|
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||||
resp := OpenAIResponse{
|
resp := schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []Choice{{Delta: &Message{Content: &s}, Index: 0}},
|
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
|
Usage: schema.OpenAIUsage{
|
||||||
|
PromptTokens: usage.Prompt,
|
||||||
|
CompletionTokens: usage.Completion,
|
||||||
|
TotalTokens: usage.Prompt + usage.Completion,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
responses <- resp
|
responses <- resp
|
||||||
@@ -67,6 +81,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ResponseFormat == "json_object" {
|
||||||
|
input.Grammar = grammar.JSONBNF
|
||||||
|
}
|
||||||
|
|
||||||
// process functions if we have any defined or if we have a function call string
|
// process functions if we have any defined or if we have a function call string
|
||||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||||
log.Debug().Msgf("Response needs to process functions")
|
log.Debug().Msgf("Response needs to process functions")
|
||||||
@@ -126,14 +144,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
r := config.Roles[role]
|
r := config.Roles[role]
|
||||||
contentExists := i.Content != nil && *i.Content != ""
|
contentExists := i.Content != nil && i.StringContent != ""
|
||||||
// First attempt to populate content via a chat message specific template
|
// First attempt to populate content via a chat message specific template
|
||||||
if config.TemplateConfig.ChatMessage != "" {
|
if config.TemplateConfig.ChatMessage != "" {
|
||||||
chatMessageData := model.ChatMessageTemplateData{
|
chatMessageData := model.ChatMessageTemplateData{
|
||||||
SystemPrompt: config.SystemPrompt,
|
SystemPrompt: config.SystemPrompt,
|
||||||
Role: r,
|
Role: r,
|
||||||
RoleName: role,
|
RoleName: role,
|
||||||
Content: *i.Content,
|
Content: i.StringContent,
|
||||||
MessageIndex: messageIndex,
|
MessageIndex: messageIndex,
|
||||||
}
|
}
|
||||||
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
||||||
@@ -152,7 +170,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
if content == "" {
|
if content == "" {
|
||||||
if r != "" {
|
if r != "" {
|
||||||
if contentExists {
|
if contentExists {
|
||||||
content = fmt.Sprint(r, " ", *i.Content)
|
content = fmt.Sprint(r, i.StringContent)
|
||||||
}
|
}
|
||||||
if i.FunctionCall != nil {
|
if i.FunctionCall != nil {
|
||||||
j, err := json.Marshal(i.FunctionCall)
|
j, err := json.Marshal(i.FunctionCall)
|
||||||
@@ -166,7 +184,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if contentExists {
|
if contentExists {
|
||||||
content = fmt.Sprint(*i.Content)
|
content = fmt.Sprint(i.StringContent)
|
||||||
}
|
}
|
||||||
if i.FunctionCall != nil {
|
if i.FunctionCall != nil {
|
||||||
j, err := json.Marshal(i.FunctionCall)
|
j, err := json.Marshal(i.FunctionCall)
|
||||||
@@ -231,17 +249,19 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if toStream {
|
if toStream {
|
||||||
responses := make(chan OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
go process(predInput, input, config, o.Loader, responses)
|
go process(predInput, input, config, o.Loader, responses)
|
||||||
|
|
||||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||||
|
|
||||||
|
usage := &schema.OpenAIUsage{}
|
||||||
|
|
||||||
for ev := range responses {
|
for ev := range responses {
|
||||||
|
usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
enc := json.NewEncoder(&buf)
|
enc := json.NewEncoder(&buf)
|
||||||
enc.Encode(ev)
|
enc.Encode(ev)
|
||||||
|
|
||||||
log.Debug().Msgf("Sending chunk: %s", buf.String())
|
log.Debug().Msgf("Sending chunk: %s", buf.String())
|
||||||
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
|
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -252,15 +272,18 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
w.Flush()
|
w.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
ID: id,
|
||||||
Choices: []Choice{
|
Created: created,
|
||||||
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
FinishReason: "stop",
|
FinishReason: "stop",
|
||||||
Index: 0,
|
Index: 0,
|
||||||
Delta: &Message{Content: &emptyMessage},
|
Delta: &schema.Message{Content: &emptyMessage},
|
||||||
}},
|
}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
|
Usage: *usage,
|
||||||
}
|
}
|
||||||
respData, _ := json.Marshal(resp)
|
respData, _ := json.Marshal(resp)
|
||||||
|
|
||||||
@@ -271,7 +294,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
result, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]Choice) {
|
result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||||
if processFunctions {
|
if processFunctions {
|
||||||
// As we have to change the result before processing, we can't stream the answer (yet?)
|
// As we have to change the result before processing, we can't stream the answer (yet?)
|
||||||
ss := map[string]interface{}{}
|
ss := map[string]interface{}{}
|
||||||
@@ -305,7 +328,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
message = backend.Finetune(*config, predInput, message)
|
message = backend.Finetune(*config, predInput, message)
|
||||||
log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
|
log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
|
||||||
|
|
||||||
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: &message}})
|
*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -315,7 +338,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
||||||
// Note: This costs (in term of CPU) another computation
|
// Note: This costs (in term of CPU) another computation
|
||||||
config.Grammar = ""
|
config.Grammar = ""
|
||||||
predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
|
images := []string{}
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
images = append(images, m.StringImages...)
|
||||||
|
}
|
||||||
|
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Msgf("inference error: %s", err.Error())
|
log.Error().Msgf("inference error: %s", err.Error())
|
||||||
return
|
return
|
||||||
@@ -327,28 +354,35 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
prediction = backend.Finetune(*config, predInput, prediction)
|
fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response)
|
||||||
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: &prediction}})
|
*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}})
|
||||||
} else {
|
} else {
|
||||||
// otherwise reply with the function call
|
// otherwise reply with the function call
|
||||||
*c = append(*c, Choice{
|
*c = append(*c, schema.Choice{
|
||||||
FinishReason: "function_call",
|
FinishReason: "function_call",
|
||||||
Message: &Message{Role: "assistant", FunctionCall: ss},
|
Message: &schema.Message{Role: "assistant", FunctionCall: ss},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
*c = append(*c, Choice{FinishReason: "stop", Index: 0, Message: &Message{Role: "assistant", Content: &s}})
|
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||||
}, nil)
|
}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "chat.completion",
|
Object: "chat.completion",
|
||||||
|
Usage: schema.OpenAIUsage{
|
||||||
|
PromptTokens: tokenUsage.Prompt,
|
||||||
|
CompletionTokens: tokenUsage.Completion,
|
||||||
|
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
respData, _ := json.Marshal(resp)
|
respData, _ := json.Marshal(resp)
|
||||||
log.Debug().Msgf("Response: %s", respData)
|
log.Debug().Msgf("Response: %s", respData)
|
||||||
|
|||||||
@@ -6,28 +6,43 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/valyala/fasthttp"
|
"github.com/valyala/fasthttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
// https://platform.openai.com/docs/api-reference/completions
|
// https://platform.openai.com/docs/api-reference/completions
|
||||||
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
process := func(s string, req *OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
|
id := uuid.New().String()
|
||||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]Choice) {}, func(s string) bool {
|
created := int(time.Now().Unix())
|
||||||
resp := OpenAIResponse{
|
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||||
Choices: []Choice{
|
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||||
|
resp := schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
Index: 0,
|
Index: 0,
|
||||||
Text: s,
|
Text: s,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Object: "text_completion",
|
Object: "text_completion",
|
||||||
|
Usage: schema.OpenAIUsage{
|
||||||
|
PromptTokens: usage.Prompt,
|
||||||
|
CompletionTokens: usage.Completion,
|
||||||
|
TotalTokens: usage.Prompt + usage.Completion,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
log.Debug().Msgf("Sending goroutine: %s", s)
|
log.Debug().Msgf("Sending goroutine: %s", s)
|
||||||
|
|
||||||
@@ -50,6 +65,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ResponseFormat == "json_object" {
|
||||||
|
input.Grammar = grammar.JSONBNF
|
||||||
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
if input.Stream {
|
if input.Stream {
|
||||||
@@ -84,7 +103,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||||
}
|
}
|
||||||
|
|
||||||
responses := make(chan OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
go process(predInput, input, config, o.Loader, responses)
|
go process(predInput, input, config, o.Loader, responses)
|
||||||
|
|
||||||
@@ -100,9 +119,11 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
w.Flush()
|
w.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
ID: id,
|
||||||
Choices: []Choice{
|
Created: created,
|
||||||
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
Index: 0,
|
Index: 0,
|
||||||
FinishReason: "stop",
|
FinishReason: "stop",
|
||||||
@@ -119,7 +140,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var result []Choice
|
var result []schema.Choice
|
||||||
|
|
||||||
|
totalTokenUsage := backend.TokenUsage{}
|
||||||
|
|
||||||
for k, i := range config.PromptStrings {
|
for k, i := range config.PromptStrings {
|
||||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||||
@@ -131,20 +155,31 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
r, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]Choice) {
|
r, tokenUsage, err := ComputeChoices(
|
||||||
*c = append(*c, Choice{Text: s, FinishReason: "stop", Index: k})
|
input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||||
}, nil)
|
*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
|
||||||
|
}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
totalTokenUsage.Prompt += tokenUsage.Prompt
|
||||||
|
totalTokenUsage.Completion += tokenUsage.Completion
|
||||||
|
|
||||||
result = append(result, r...)
|
result = append(result, r...)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "text_completion",
|
Object: "text_completion",
|
||||||
|
Usage: schema.OpenAIUsage{
|
||||||
|
PromptTokens: totalTokenUsage.Prompt,
|
||||||
|
CompletionTokens: totalTokenUsage.Completion,
|
||||||
|
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonResult, _ := json.Marshal(resp)
|
jsonResult, _ := json.Marshal(resp)
|
||||||
|
|||||||
@@ -3,11 +3,16 @@ package openai
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -31,7 +36,9 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
templateFile = config.TemplateConfig.Edit
|
templateFile = config.TemplateConfig.Edit
|
||||||
}
|
}
|
||||||
|
|
||||||
var result []Choice
|
var result []schema.Choice
|
||||||
|
totalTokenUsage := backend.TokenUsage{}
|
||||||
|
|
||||||
for _, i := range config.InputStrings {
|
for _, i := range config.InputStrings {
|
||||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
|
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
|
||||||
@@ -44,20 +51,32 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
r, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]Choice) {
|
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||||
*c = append(*c, Choice{Text: s})
|
*c = append(*c, schema.Choice{Text: s})
|
||||||
}, nil)
|
}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
totalTokenUsage.Prompt += tokenUsage.Prompt
|
||||||
|
totalTokenUsage.Completion += tokenUsage.Completion
|
||||||
|
|
||||||
result = append(result, r...)
|
result = append(result, r...)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
id := uuid.New().String()
|
||||||
|
created := int(time.Now().Unix())
|
||||||
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "edit",
|
Object: "edit",
|
||||||
|
Usage: schema.OpenAIUsage{
|
||||||
|
PromptTokens: totalTokenUsage.Prompt,
|
||||||
|
CompletionTokens: totalTokenUsage.Completion,
|
||||||
|
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonResult, _ := json.Marshal(resp)
|
jsonResult, _ := json.Marshal(resp)
|
||||||
|
|||||||
@@ -3,9 +3,13 @@ package openai
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
@@ -25,7 +29,7 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
items := []Item{}
|
items := []schema.Item{}
|
||||||
|
|
||||||
for i, s := range config.InputToken {
|
for i, s := range config.InputToken {
|
||||||
// get the model function to call for the result
|
// get the model function to call for the result
|
||||||
@@ -38,7 +42,7 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, s := range config.InputStrings {
|
for i, s := range config.InputStrings {
|
||||||
@@ -52,13 +56,17 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
id := uuid.New().String()
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
created := int(time.Now().Unix())
|
||||||
Data: items,
|
resp := &schema.OpenAIResponse{
|
||||||
Object: "list",
|
ID: id,
|
||||||
|
Created: created,
|
||||||
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
|
Data: items,
|
||||||
|
Object: "list",
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonResult, _ := json.Marshal(resp)
|
jsonResult, _ := json.Marshal(resp)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package openai
|
package openai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -8,6 +9,10 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
@@ -49,6 +54,31 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
src := ""
|
||||||
|
if input.File != "" {
|
||||||
|
//base 64 decode the file and write it somewhere
|
||||||
|
// that we will cleanup
|
||||||
|
decoded, err := base64.StdEncoding.DecodeString(input.File)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Create a temporary file
|
||||||
|
outputFile, err := os.CreateTemp(o.ImageDir, "b64")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// write the base64 result
|
||||||
|
writer := bufio.NewWriter(outputFile)
|
||||||
|
_, err = writer.Write(decoded)
|
||||||
|
if err != nil {
|
||||||
|
outputFile.Close()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
outputFile.Close()
|
||||||
|
src = outputFile.Name()
|
||||||
|
defer os.RemoveAll(src)
|
||||||
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
// XXX: Only stablediffusion is supported for now
|
// XXX: Only stablediffusion is supported for now
|
||||||
@@ -73,8 +103,8 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
if input.ResponseFormat == "b64_json" {
|
if input.ResponseFormat == "b64_json" {
|
||||||
b64JSON = true
|
b64JSON = true
|
||||||
}
|
}
|
||||||
|
// src and clip_skip
|
||||||
var result []Item
|
var result []schema.Item
|
||||||
for _, i := range config.PromptStrings {
|
for _, i := range config.PromptStrings {
|
||||||
n := input.N
|
n := input.N
|
||||||
if input.N == 0 {
|
if input.N == 0 {
|
||||||
@@ -121,7 +151,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
|
|
||||||
baseURL := c.BaseURL()
|
baseURL := c.BaseURL()
|
||||||
|
|
||||||
fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.Loader, *config, o)
|
fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, src, output, o.Loader, *config, o)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -129,7 +159,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
item := &Item{}
|
item := &schema.Item{}
|
||||||
|
|
||||||
if b64JSON {
|
if b64JSON {
|
||||||
defer os.RemoveAll(output)
|
defer os.RemoveAll(output)
|
||||||
@@ -147,8 +177,12 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
id := uuid.New().String()
|
||||||
Data: result,
|
created := int(time.Now().Unix())
|
||||||
|
resp := &schema.OpenAIResponse{
|
||||||
|
ID: id,
|
||||||
|
Created: created,
|
||||||
|
Data: result,
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonResult, _ := json.Marshal(resp)
|
jsonResult, _ := json.Marshal(resp)
|
||||||
|
|||||||
@@ -4,34 +4,52 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ComputeChoices(req *OpenAIRequest, predInput string, config *config.Config, o *options.Option, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
|
func ComputeChoices(
|
||||||
n := req.N
|
req *schema.OpenAIRequest,
|
||||||
result := []Choice{}
|
predInput string,
|
||||||
|
config *config.Config,
|
||||||
|
o *options.Option,
|
||||||
|
loader *model.ModelLoader,
|
||||||
|
cb func(string, *[]schema.Choice),
|
||||||
|
tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) {
|
||||||
|
n := req.N // number of completions to return
|
||||||
|
result := []schema.Choice{}
|
||||||
|
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
n = 1
|
n = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the model function to call for the result
|
images := []string{}
|
||||||
predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
|
for _, m := range req.Messages {
|
||||||
if err != nil {
|
images = append(images, m.StringImages...)
|
||||||
return result, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get the model function to call for the result
|
||||||
|
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
|
||||||
|
if err != nil {
|
||||||
|
return result, backend.TokenUsage{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenUsage := backend.TokenUsage{}
|
||||||
|
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
prediction, err := predFunc()
|
prediction, err := predFunc()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, err
|
return result, backend.TokenUsage{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
prediction = backend.Finetune(*config, predInput, prediction)
|
tokenUsage.Prompt += prediction.Usage.Prompt
|
||||||
cb(prediction, &result)
|
tokenUsage.Completion += prediction.Usage.Completion
|
||||||
|
|
||||||
|
finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
|
||||||
|
cb(finetunedResponse, &result)
|
||||||
|
|
||||||
//result = append(result, Choice{Text: prediction})
|
//result = append(result, Choice{Text: prediction})
|
||||||
|
|
||||||
}
|
}
|
||||||
return result, err
|
return result, tokenUsage, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
)
|
)
|
||||||
@@ -16,7 +17,7 @@ func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func
|
|||||||
}
|
}
|
||||||
var mm map[string]interface{} = map[string]interface{}{}
|
var mm map[string]interface{} = map[string]interface{}{}
|
||||||
|
|
||||||
dataModels := []OpenAIModel{}
|
dataModels := []schema.OpenAIModel{}
|
||||||
|
|
||||||
var filterFn func(name string) bool
|
var filterFn func(name string) bool
|
||||||
filter := c.Query("filter")
|
filter := c.Query("filter")
|
||||||
@@ -45,7 +46,7 @@ func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func
|
|||||||
}
|
}
|
||||||
|
|
||||||
if filterFn(c.Name) {
|
if filterFn(c.Name) {
|
||||||
dataModels = append(dataModels, OpenAIModel{ID: c.Name, Object: "model"})
|
dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -53,13 +54,13 @@ func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func
|
|||||||
for _, m := range models {
|
for _, m := range models {
|
||||||
// And only adds them if they shouldn't be skipped.
|
// And only adds them if they shouldn't be skipped.
|
||||||
if _, exists := mm[m]; !exists && filterFn(m) {
|
if _, exists := mm[m]; !exists && filterFn(m) {
|
||||||
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
|
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return c.JSON(struct {
|
return c.JSON(struct {
|
||||||
Object string `json:"object"`
|
Object string `json:"object"`
|
||||||
Data []OpenAIModel `json:"data"`
|
Data []schema.OpenAIModel `json:"data"`
|
||||||
}{
|
}{
|
||||||
Object: "list",
|
Object: "list",
|
||||||
Data: dataModels,
|
Data: dataModels,
|
||||||
|
|||||||
@@ -2,28 +2,32 @@ package openai
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
options "github.com/go-skynet/LocalAI/api/options"
|
options "github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *OpenAIRequest, error) {
|
func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
loader := o.Loader
|
loader := o.Loader
|
||||||
input := new(OpenAIRequest)
|
input := new(schema.OpenAIRequest)
|
||||||
ctx, cancel := context.WithCancel(o.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
input.Context = ctx
|
input.Context = ctx
|
||||||
input.Cancel = cancel
|
input.Cancel = cancel
|
||||||
// Get input data from the request body
|
// Get input data from the request body
|
||||||
if err := c.BodyParser(input); err != nil {
|
if err := c.BodyParser(input); err != nil {
|
||||||
return "", nil, err
|
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
modelFile := input.Model
|
modelFile := input.Model
|
||||||
@@ -60,7 +64,38 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *Open
|
|||||||
return modelFile, input, nil
|
return modelFile, input, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateConfig(config *config.Config, input *OpenAIRequest) {
|
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||||
|
// encodes it in base64 and returns the base64 string
|
||||||
|
func getBase64Image(s string) (string, error) {
|
||||||
|
if strings.HasPrefix(s, "http") {
|
||||||
|
// download the image
|
||||||
|
resp, err := http.Get(s)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// read the image data into memory
|
||||||
|
data, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode the image data in base64
|
||||||
|
encoded := base64.StdEncoding.EncodeToString(data)
|
||||||
|
|
||||||
|
// return the base64 string
|
||||||
|
return encoded, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
|
||||||
|
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
|
||||||
|
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("not valid string")
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||||
if input.Echo {
|
if input.Echo {
|
||||||
config.Echo = input.Echo
|
config.Echo = input.Echo
|
||||||
}
|
}
|
||||||
@@ -75,6 +110,10 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
|||||||
config.Backend = input.Backend
|
config.Backend = input.Backend
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if input.ClipSkip != 0 {
|
||||||
|
config.Diffusers.ClipSkip = input.ClipSkip
|
||||||
|
}
|
||||||
|
|
||||||
if input.ModelBaseName != "" {
|
if input.ModelBaseName != "" {
|
||||||
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
|
||||||
}
|
}
|
||||||
@@ -124,6 +163,35 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decode each request's message content
|
||||||
|
index := 0
|
||||||
|
for i, m := range input.Messages {
|
||||||
|
switch content := m.Content.(type) {
|
||||||
|
case string:
|
||||||
|
input.Messages[i].StringContent = content
|
||||||
|
case []interface{}:
|
||||||
|
dat, _ := json.Marshal(content)
|
||||||
|
c := []schema.Content{}
|
||||||
|
json.Unmarshal(dat, &c)
|
||||||
|
for _, pp := range c {
|
||||||
|
if pp.Type == "text" {
|
||||||
|
input.Messages[i].StringContent = pp.Text
|
||||||
|
} else if pp.Type == "image_url" {
|
||||||
|
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
|
||||||
|
base64, err := getBase64Image(pp.ImageURL.URL)
|
||||||
|
if err == nil {
|
||||||
|
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
// set a placeholder for each image
|
||||||
|
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
|
||||||
|
index++
|
||||||
|
} else {
|
||||||
|
fmt.Print("Failed encoding image", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if input.RepeatPenalty != 0 {
|
if input.RepeatPenalty != 0 {
|
||||||
config.RepeatPenalty = input.RepeatPenalty
|
config.RepeatPenalty = input.RepeatPenalty
|
||||||
}
|
}
|
||||||
@@ -214,7 +282,7 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func readConfig(modelFile string, input *OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *OpenAIRequest, error) {
|
func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
|
||||||
// Load a config file if present after the model name
|
// Load a config file if present after the model name
|
||||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -24,6 +25,7 @@ type Option struct {
|
|||||||
PreloadModelsFromPath string
|
PreloadModelsFromPath string
|
||||||
CORSAllowOrigins string
|
CORSAllowOrigins string
|
||||||
ApiKeys []string
|
ApiKeys []string
|
||||||
|
Metrics *metrics.Metrics
|
||||||
|
|
||||||
Galleries []gallery.Gallery
|
Galleries []gallery.Gallery
|
||||||
|
|
||||||
@@ -33,6 +35,8 @@ type Option struct {
|
|||||||
ExternalGRPCBackends map[string]string
|
ExternalGRPCBackends map[string]string
|
||||||
|
|
||||||
AutoloadGalleries bool
|
AutoloadGalleries bool
|
||||||
|
|
||||||
|
SingleBackend bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type AppOption func(*Option)
|
type AppOption func(*Option)
|
||||||
@@ -58,6 +62,10 @@ func WithCors(b bool) AppOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var EnableSingleBackend = func(o *Option) {
|
||||||
|
o.SingleBackend = true
|
||||||
|
}
|
||||||
|
|
||||||
var EnableGalleriesAutoload = func(o *Option) {
|
var EnableGalleriesAutoload = func(o *Option) {
|
||||||
o.AutoloadGalleries = true
|
o.AutoloadGalleries = true
|
||||||
}
|
}
|
||||||
@@ -93,6 +101,7 @@ func WithStringGalleries(galls string) AppOption {
|
|||||||
return func(o *Option) {
|
return func(o *Option) {
|
||||||
if galls == "" {
|
if galls == "" {
|
||||||
log.Debug().Msgf("no galleries to load")
|
log.Debug().Msgf("no galleries to load")
|
||||||
|
o.Galleries = []gallery.Gallery{}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
var galleries []gallery.Gallery
|
var galleries []gallery.Gallery
|
||||||
@@ -191,3 +200,9 @@ func WithApiKeys(apiKeys []string) AppOption {
|
|||||||
o.ApiKeys = apiKeys
|
o.ApiKeys = apiKeys
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithMetrics(meter *metrics.Metrics) AppOption {
|
||||||
|
return func(o *Option) {
|
||||||
|
o.Metrics = meter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package openai
|
package schema
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
@@ -55,11 +55,25 @@ type Choice struct {
|
|||||||
Text string `json:"text,omitempty"`
|
Text string `json:"text,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Content struct {
|
||||||
|
Type string `json:"type" yaml:"type"`
|
||||||
|
Text string `json:"text" yaml:"text"`
|
||||||
|
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ContentURL struct {
|
||||||
|
URL string `json:"url" yaml:"url"`
|
||||||
|
}
|
||||||
|
|
||||||
type Message struct {
|
type Message struct {
|
||||||
// The message role
|
// The message role
|
||||||
Role string `json:"role,omitempty" yaml:"role"`
|
Role string `json:"role,omitempty" yaml:"role"`
|
||||||
// The message content
|
// The message content
|
||||||
Content *string `json:"content" yaml:"content"`
|
Content interface{} `json:"content" yaml:"content"`
|
||||||
|
|
||||||
|
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
||||||
|
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
||||||
|
|
||||||
// A result of a function call
|
// A result of a function call
|
||||||
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package api
|
package schema
|
||||||
|
|
||||||
import "time"
|
import "time"
|
||||||
|
|
||||||
3
backend/cpp/grpc/.gitignore
vendored
Normal file
3
backend/cpp/grpc/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
installed_packages/
|
||||||
|
grpc_build/
|
||||||
|
grpc_repo/
|
||||||
81
backend/cpp/grpc/script/build_grpc.sh
Executable file
81
backend/cpp/grpc/script/build_grpc.sh
Executable file
@@ -0,0 +1,81 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Builds locally from sources the packages needed by the llama cpp backend.
|
||||||
|
|
||||||
|
# Makes sure a few base packages exist.
|
||||||
|
# sudo apt-get --no-upgrade -y install g++ gcc binutils cmake git build-essential autoconf libtool pkg-config
|
||||||
|
|
||||||
|
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||||
|
echo "Script directory: $SCRIPT_DIR"
|
||||||
|
|
||||||
|
CPP_INSTALLED_PACKAGES_DIR=$1
|
||||||
|
if [ -z ${CPP_INSTALLED_PACKAGES_DIR} ]; then
|
||||||
|
echo "CPP_INSTALLED_PACKAGES_DIR env variable not set. Don't know where to install: failed.";
|
||||||
|
echo
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d "${CPP_INSTALLED_PACKAGES_DIR}" ]; then
|
||||||
|
echo "gRPC installation directory already exists. Nothing to do."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# The depth when cloning a git repo. 1 speeds up the clone when the repo history is not needed.
|
||||||
|
GIT_CLONE_DEPTH=1
|
||||||
|
|
||||||
|
NUM_BUILD_THREADS=$(nproc --ignore=1)
|
||||||
|
|
||||||
|
# Google gRPC --------------------------------------------------------------------------------------
|
||||||
|
TAG_LIB_GRPC="v1.59.0"
|
||||||
|
GIT_REPO_LIB_GRPC="https://github.com/grpc/grpc.git"
|
||||||
|
GRPC_REPO_DIR="${SCRIPT_DIR}/../grpc_repo"
|
||||||
|
GRPC_BUILD_DIR="${SCRIPT_DIR}/../grpc_build"
|
||||||
|
SRC_DIR_LIB_GRPC="${GRPC_REPO_DIR}/grpc"
|
||||||
|
|
||||||
|
echo "SRC_DIR_LIB_GRPC: ${SRC_DIR_LIB_GRPC}"
|
||||||
|
echo "GRPC_REPO_DIR: ${GRPC_REPO_DIR}"
|
||||||
|
echo "GRPC_BUILD_DIR: ${GRPC_BUILD_DIR}"
|
||||||
|
|
||||||
|
mkdir -pv ${GRPC_REPO_DIR}
|
||||||
|
|
||||||
|
rm -rf ${GRPC_BUILD_DIR}
|
||||||
|
mkdir -pv ${GRPC_BUILD_DIR}
|
||||||
|
|
||||||
|
mkdir -pv ${CPP_INSTALLED_PACKAGES_DIR}
|
||||||
|
|
||||||
|
if [ -d "${SRC_DIR_LIB_GRPC}" ]; then
|
||||||
|
echo "gRPC source already exists locally. Not cloned again."
|
||||||
|
else
|
||||||
|
( cd ${GRPC_REPO_DIR} && \
|
||||||
|
git clone --depth ${GIT_CLONE_DEPTH} -b ${TAG_LIB_GRPC} ${GIT_REPO_LIB_GRPC} && \
|
||||||
|
cd ${SRC_DIR_LIB_GRPC} && \
|
||||||
|
git submodule update --init --recursive --depth ${GIT_CLONE_DEPTH}
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
|
( cd ${GRPC_BUILD_DIR} && \
|
||||||
|
cmake -G "Unix Makefiles" \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DgRPC_INSTALL=ON \
|
||||||
|
-DEXECUTABLE_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/bin \
|
||||||
|
-DLIBRARY_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/lib \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
-DgRPC_BUILD_CSHARP_EXT=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_CPP_PLUGIN=ON \
|
||||||
|
-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF \
|
||||||
|
-DgRPC_BUILD_GRPC_PYTHON_PLUGIN=ON \
|
||||||
|
-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF \
|
||||||
|
-Dprotobuf_WITH_ZLIB=ON \
|
||||||
|
-DRE2_BUILD_TESTING=OFF \
|
||||||
|
-DCMAKE_INSTALL_PREFIX=${CPP_INSTALLED_PACKAGES_DIR}/ \
|
||||||
|
${SRC_DIR_LIB_GRPC} && \
|
||||||
|
cmake --build . -- -j ${NUM_BUILD_THREADS} && \
|
||||||
|
cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
||||||
|
)
|
||||||
|
|
||||||
|
rm -rf ${GRPC_BUILD_DIR}
|
||||||
|
rm -rf ${GRPC_REPO_DIR}
|
||||||
|
|
||||||
74
backend/cpp/llama/CMakeLists.txt
Normal file
74
backend/cpp/llama/CMakeLists.txt
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
set(TARGET myclip)
|
||||||
|
add_library(${TARGET} clip.cpp clip.h)
|
||||||
|
install(TARGETS ${TARGET} LIBRARY)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
if (NOT MSVC)
|
||||||
|
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(TARGET grpc-server)
|
||||||
|
# END CLIP hack
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
cmake_minimum_required(VERSION 3.15)
|
||||||
|
set(TARGET grpc-server)
|
||||||
|
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||||
|
set(_REFLECTION grpc++_reflection)
|
||||||
|
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||||
|
link_directories("/opt/homebrew/lib")
|
||||||
|
include_directories("/opt/homebrew/include")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_package(absl CONFIG REQUIRED)
|
||||||
|
find_package(Protobuf CONFIG REQUIRED)
|
||||||
|
find_package(gRPC CONFIG REQUIRED)
|
||||||
|
|
||||||
|
find_program(_PROTOBUF_PROTOC protoc)
|
||||||
|
set(_GRPC_GRPCPP grpc++)
|
||||||
|
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
||||||
|
|
||||||
|
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
include_directories(${Protobuf_INCLUDE_DIRS})
|
||||||
|
|
||||||
|
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
|
||||||
|
# Proto file
|
||||||
|
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE)
|
||||||
|
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
||||||
|
|
||||||
|
# Generated sources
|
||||||
|
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc")
|
||||||
|
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h")
|
||||||
|
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc")
|
||||||
|
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h")
|
||||||
|
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
|
||||||
|
COMMAND ${_PROTOBUF_PROTOC}
|
||||||
|
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||||
|
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||||
|
-I "${hw_proto_path}"
|
||||||
|
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
|
||||||
|
"${hw_proto}"
|
||||||
|
DEPENDS "${hw_proto}")
|
||||||
|
|
||||||
|
# hw_grpc_proto
|
||||||
|
add_library(hw_grpc_proto
|
||||||
|
${hw_grpc_srcs}
|
||||||
|
${hw_grpc_hdrs}
|
||||||
|
${hw_proto_srcs}
|
||||||
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
|
add_executable(${TARGET} grpc-server.cpp json.hpp )
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
|
absl::flags_parse
|
||||||
|
gRPC::${_REFLECTION}
|
||||||
|
gRPC::${_GRPC_GRPCPP}
|
||||||
|
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
if(TARGET BUILD_INFO)
|
||||||
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
|
endif()
|
||||||
50
backend/cpp/llama/Makefile
Normal file
50
backend/cpp/llama/Makefile
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
|
||||||
|
LLAMA_VERSION?=d9b33fe95bd257b36c84ee5769cc048230067d6f
|
||||||
|
|
||||||
|
CMAKE_ARGS?=
|
||||||
|
BUILD_TYPE?=
|
||||||
|
|
||||||
|
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||||
|
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
|
# to CMAKE_ARGS automatically
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
|
# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
else ifeq ($(BUILD_TYPE),clblast)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
llama.cpp:
|
||||||
|
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||||
|
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
llama.cpp/examples/grpc-server:
|
||||||
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
|
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||||
|
|
||||||
|
rebuild:
|
||||||
|
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
rm -rf grpc-server
|
||||||
|
$(MAKE) grpc-server
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf llama.cpp
|
||||||
|
rm -rf grpc-server
|
||||||
|
|
||||||
|
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||||
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||||
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
2192
backend/cpp/llama/grpc-server.cpp
Normal file
2192
backend/cpp/llama/grpc-server.cpp
Normal file
File diff suppressed because it is too large
Load Diff
24596
backend/cpp/llama/json.hpp
Normal file
24596
backend/cpp/llama/json.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -5,8 +5,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
|
bert "github.com/go-skynet/LocalAI/pkg/backend/llm/bert"
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
bert "github.com/go-skynet/LocalAI/pkg/grpc/llm/bert"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// GRPC Falcon server
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
falcon "github.com/go-skynet/LocalAI/pkg/grpc/llm/falcon"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
gpt4all "github.com/go-skynet/LocalAI/pkg/grpc/llm/gpt4all"
|
gpt4all "github.com/go-skynet/LocalAI/pkg/backend/llm/gpt4all"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
langchain "github.com/go-skynet/LocalAI/pkg/grpc/llm/langchain"
|
langchain "github.com/go-skynet/LocalAI/pkg/backend/llm/langchain"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
bloomz "github.com/go-skynet/LocalAI/pkg/grpc/llm/bloomz"
|
llama "github.com/go-skynet/LocalAI/pkg/backend/llm/llama-stable"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
@@ -17,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil {
|
if err := grpc.StartServer(*addr, &llama.LLM{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -7,7 +7,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama"
|
llama "github.com/go-skynet/LocalAI/pkg/backend/llm/llama"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
tts "github.com/go-skynet/LocalAI/pkg/grpc/tts"
|
tts "github.com/go-skynet/LocalAI/pkg/backend/tts"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
rwkv "github.com/go-skynet/LocalAI/pkg/grpc/llm/rwkv"
|
rwkv "github.com/go-skynet/LocalAI/pkg/backend/llm/rwkv"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
image "github.com/go-skynet/LocalAI/pkg/grpc/image"
|
image "github.com/go-skynet/LocalAI/pkg/backend/image"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transcribe "github.com/go-skynet/LocalAI/pkg/grpc/transcribe"
|
transcribe "github.com/go-skynet/LocalAI/pkg/backend/transcribe"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|||||||
0
custom-ca-certs/.keep
Normal file
0
custom-ca-certs/.keep
Normal file
@@ -12,4 +12,5 @@ services:
|
|||||||
- .env
|
- .env
|
||||||
volumes:
|
volumes:
|
||||||
- ./models:/models:cached
|
- ./models:/models:cached
|
||||||
|
- ./images/:/tmp/generated/images/
|
||||||
command: ["/usr/bin/local-ai" ]
|
command: ["/usr/bin/local-ai" ]
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ cd /build
|
|||||||
|
|
||||||
if [ "$REBUILD" != "false" ]; then
|
if [ "$REBUILD" != "false" ]; then
|
||||||
rm -rf ./local-ai
|
rm -rf ./local-ai
|
||||||
ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build -j${BUILD_PARALLELISM:-1}
|
make build -j${BUILD_PARALLELISM:-1}
|
||||||
else
|
else
|
||||||
echo "@@@@@"
|
echo "@@@@@"
|
||||||
echo "Skipping rebuild"
|
echo "Skipping rebuild"
|
||||||
|
|||||||
@@ -157,6 +157,26 @@ Allows to run any LocalAI-compatible model as a backend on the servers of https:
|
|||||||
|
|
||||||
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
|
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
|
||||||
|
|
||||||
|
### Continue
|
||||||
|
|
||||||
|
_by [@gruberdev](https://github.com/gruberdev)_
|
||||||
|
|
||||||
|
<img src="continue/img/screen.png" width="600" height="200" alt="Screenshot">
|
||||||
|
|
||||||
|
Demonstrates how to integrate an open-source copilot alternative that enhances code analysis, completion, and improvements. This approach seamlessly integrates with any LocalAI model, offering a more user-friendly experience.
|
||||||
|
|
||||||
|
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/continue/)
|
||||||
|
|
||||||
|
### Streamlit bot
|
||||||
|
|
||||||
|
_by [@majoshi1](https://github.com/majoshi1)_
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
A chat bot made using `Streamlit` & LocalAI.
|
||||||
|
|
||||||
|
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/streamlit-bot/)
|
||||||
|
|
||||||
## Want to contribute?
|
## Want to contribute?
|
||||||
|
|
||||||
Create an issue, and put `Example: <description>` in the title! We will post your examples here.
|
Create an issue, and put `Example: <description>` in the title! We will post your examples here.
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
|
# CPU .env docs: https://localai.io/howtos/easy-setup-docker-cpu/
|
||||||
|
# GPU .env docs: https://localai.io/howtos/easy-setup-docker-gpu/
|
||||||
|
|
||||||
OPENAI_API_KEY=sk---anystringhere
|
OPENAI_API_KEY=sk---anystringhere
|
||||||
OPENAI_API_BASE=http://api:8080/v1
|
OPENAI_API_BASE=http://api:8080/v1
|
||||||
# Models to preload at start
|
# Models to preload at start
|
||||||
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
|
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings,
|
||||||
|
# see other options in the model gallery at https://github.com/go-skynet/model-gallery
|
||||||
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
|
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
|
||||||
@@ -10,12 +10,16 @@ git clone https://github.com/go-skynet/LocalAI
|
|||||||
|
|
||||||
cd LocalAI/examples/autoGPT
|
cd LocalAI/examples/autoGPT
|
||||||
|
|
||||||
|
cp -rfv .env.example .env
|
||||||
|
|
||||||
|
# Edit the .env file to set a different model by editing `PRELOAD_MODELS`.
|
||||||
|
vim .env
|
||||||
|
|
||||||
docker-compose run --rm auto-gpt
|
docker-compose run --rm auto-gpt
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
|
Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
|
||||||
|
|
||||||
See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
|
|
||||||
|
|
||||||
## Without docker
|
## Without docker
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: backend monitor
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: backend-shutdown
|
||||||
|
type: http
|
||||||
|
seq: 3
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}"
|
||||||
|
}
|
||||||
|
}
|
||||||
5
examples/bruno/LocalAI Test Requests/bruno.json
Normal file
5
examples/bruno/LocalAI Test Requests/bruno.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"version": "1",
|
||||||
|
"name": "LocalAI Test Requests",
|
||||||
|
"type": "collection"
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
vars {
|
||||||
|
HOST: localhost
|
||||||
|
PORT: 8080
|
||||||
|
DEFAULT_MODEL: gpt-3.5-turbo
|
||||||
|
PROTOCOL: http://
|
||||||
|
}
|
||||||
11
examples/bruno/LocalAI Test Requests/get models list.bru
Normal file
11
examples/bruno/LocalAI Test Requests/get models list.bru
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: get models list
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
meta {
|
||||||
|
name: Generate image
|
||||||
|
type: http
|
||||||
|
seq: 1
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"prompt": "<positive prompt>|<negative prompt>",
|
||||||
|
"model": "model-name",
|
||||||
|
"step": 51,
|
||||||
|
"size": "1024x1024",
|
||||||
|
"image": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
meta {
|
||||||
|
name: -completions
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"prompt": "function downloadFile(string url, string outputPath) {",
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
23
examples/bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
23
examples/bruno/LocalAI Test Requests/llm text/-edits.bru
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
meta {
|
||||||
|
name: -edits
|
||||||
|
type: http
|
||||||
|
seq: 5
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "What day of the wek is it?",
|
||||||
|
"instruction": "Fix the spelling mistakes"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: -embeddings
|
||||||
|
type: http
|
||||||
|
seq: 6
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
meta {
|
||||||
|
name: chat completion -simple- 1 message-
|
||||||
|
type: http
|
||||||
|
seq: 4
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "How could one use friction to cook an egg?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.2,
|
||||||
|
"grammar": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
meta {
|
||||||
|
name: chat-completions -long-
|
||||||
|
type: http
|
||||||
|
seq: 5
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
||||||
|
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
||||||
|
{"role": "assistant",
|
||||||
|
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
meta {
|
||||||
|
name: chat-completions -stream-
|
||||||
|
type: http
|
||||||
|
seq: 6
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
||||||
|
"max_tokens": 256,
|
||||||
|
"temperature": 0.9,
|
||||||
|
"stream": true
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: add model gallery
|
||||||
|
type: http
|
||||||
|
seq: 10
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
||||||
|
"name": "test"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: delete model gallery
|
||||||
|
type: http
|
||||||
|
seq: 11
|
||||||
|
}
|
||||||
|
|
||||||
|
delete {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"name": "test"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: list MODELS in galleries
|
||||||
|
type: http
|
||||||
|
seq: 7
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
meta {
|
||||||
|
name: list model GALLERIES
|
||||||
|
type: http
|
||||||
|
seq: 8
|
||||||
|
}
|
||||||
|
|
||||||
|
get {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||||
|
body: none
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
meta {
|
||||||
|
name: model gallery apply -gist-
|
||||||
|
type: http
|
||||||
|
seq: 12
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: model gallery apply
|
||||||
|
type: http
|
||||||
|
seq: 9
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
||||||
|
"name": "codellama7b"
|
||||||
|
}
|
||||||
|
}
|
||||||
22
examples/bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
22
examples/bruno/LocalAI Test Requests/tts/-tts.bru
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
meta {
|
||||||
|
name: -tts
|
||||||
|
type: http
|
||||||
|
seq: 2
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||||
|
body: json
|
||||||
|
auth: none
|
||||||
|
}
|
||||||
|
|
||||||
|
headers {
|
||||||
|
Content-Type: application/json
|
||||||
|
}
|
||||||
|
|
||||||
|
body:json {
|
||||||
|
{
|
||||||
|
"model": "{{DEFAULT_MODEL}}",
|
||||||
|
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||||
|
}
|
||||||
|
}
|
||||||
16
examples/chainlit/Dockerfile
Normal file
16
examples/chainlit/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Use an official Python runtime as a parent image
|
||||||
|
FROM harbor.home.sfxworks.net/docker/library/python:3.9-slim
|
||||||
|
|
||||||
|
# Set the working directory in the container
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the current directory contents into the container at /app
|
||||||
|
COPY requirements.txt /app
|
||||||
|
|
||||||
|
# Install any needed packages specified in requirements.txt
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
|
# Run app.py when the container launches
|
||||||
|
CMD ["chainlit", "run", "-h", "--host", "0.0.0.0", "main.py" ]
|
||||||
25
examples/chainlit/README.md
Normal file
25
examples/chainlit/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# LocalAI Demonstration with Embeddings and Chainlit
|
||||||
|
|
||||||
|
This demonstration shows you how to use embeddings with existing data in `LocalAI`, and how to integrate it with Chainlit for an interactive querying experience. We are using the `llama_index` library to facilitate the embedding and querying processes, and `chainlit` to provide an interactive interface. The `Weaviate` client is used as the embedding source.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before proceeding, make sure you have the following installed:
|
||||||
|
- Weaviate client
|
||||||
|
- LocalAI and its dependencies
|
||||||
|
- Chainlit and its dependencies
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
1. Clone this repository:
|
||||||
|
2. Navigate to the project directory:
|
||||||
|
3. Run the example: `chainlit run main.py`
|
||||||
|
|
||||||
|
# Highlight on `llama_index` and `chainlit`
|
||||||
|
|
||||||
|
`llama_index` is the key library that facilitates the process of embedding and querying data in LocalAI. It provides a seamless interface to integrate various components, such as `WeaviateVectorStore`, `LocalAI`, `ServiceContext`, and more, for a smooth querying experience.
|
||||||
|
|
||||||
|
`chainlit` is used to provide an interactive interface for users to query the data and see the results in real-time. It integrates with llama_index to handle the querying process and display the results to the user.
|
||||||
|
|
||||||
|
In this example, `llama_index` is used to set up the `VectorStoreIndex` and `QueryEngine`, and `chainlit` is used to handle the user interactions with `LocalAI` and display the results.
|
||||||
|
|
||||||
16
examples/chainlit/config.yaml
Normal file
16
examples/chainlit/config.yaml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
localAI:
|
||||||
|
temperature: 0
|
||||||
|
modelName: gpt-3.5-turbo
|
||||||
|
apiBase: http://local-ai.default
|
||||||
|
apiKey: stub
|
||||||
|
streaming: True
|
||||||
|
weviate:
|
||||||
|
url: http://weviate.local
|
||||||
|
index: AIChroma
|
||||||
|
query:
|
||||||
|
mode: hybrid
|
||||||
|
topK: 1
|
||||||
|
alpha: 0.0
|
||||||
|
chunkSize: 1024
|
||||||
|
embedding:
|
||||||
|
model: BAAI/bge-small-en-v1.5
|
||||||
82
examples/chainlit/main.py
Normal file
82
examples/chainlit/main.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import weaviate
|
||||||
|
from llama_index.storage.storage_context import StorageContext
|
||||||
|
from llama_index.vector_stores import WeaviateVectorStore
|
||||||
|
|
||||||
|
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
|
||||||
|
from llama_index.callbacks.base import CallbackManager
|
||||||
|
from llama_index import (
|
||||||
|
LLMPredictor,
|
||||||
|
ServiceContext,
|
||||||
|
StorageContext,
|
||||||
|
VectorStoreIndex,
|
||||||
|
)
|
||||||
|
import chainlit as cl
|
||||||
|
|
||||||
|
from llama_index.llms import LocalAI
|
||||||
|
from llama_index.embeddings import HuggingFaceEmbedding
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# Load the configuration file
|
||||||
|
with open("config.yaml", "r") as ymlfile:
|
||||||
|
cfg = yaml.safe_load(ymlfile)
|
||||||
|
|
||||||
|
# Get the values from the configuration file or set the default values
|
||||||
|
temperature = cfg['localAI'].get('temperature', 0)
|
||||||
|
model_name = cfg['localAI'].get('modelName', "gpt-3.5-turbo")
|
||||||
|
api_base = cfg['localAI'].get('apiBase', "http://local-ai.default")
|
||||||
|
api_key = cfg['localAI'].get('apiKey', "stub")
|
||||||
|
streaming = cfg['localAI'].get('streaming', True)
|
||||||
|
weaviate_url = cfg['weviate'].get('url', "http://weviate.default")
|
||||||
|
index_name = cfg['weviate'].get('index', "AIChroma")
|
||||||
|
query_mode = cfg['query'].get('mode', "hybrid")
|
||||||
|
topK = cfg['query'].get('topK', 1)
|
||||||
|
alpha = cfg['query'].get('alpha', 0.0)
|
||||||
|
embed_model_name = cfg['embedding'].get('model', "BAAI/bge-small-en-v1.5")
|
||||||
|
chunk_size = cfg['query'].get('chunkSize', 1024)
|
||||||
|
|
||||||
|
|
||||||
|
embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
|
||||||
|
|
||||||
|
|
||||||
|
llm = LocalAI(temperature=temperature, model_name=model_name, api_base=api_base, api_key=api_key, streaming=streaming)
|
||||||
|
llm.globally_use_chat_completions = True;
|
||||||
|
client = weaviate.Client(weaviate_url)
|
||||||
|
vector_store = WeaviateVectorStore(weaviate_client=client, index_name=index_name)
|
||||||
|
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
||||||
|
|
||||||
|
@cl.on_chat_start
|
||||||
|
async def factory():
|
||||||
|
|
||||||
|
llm_predictor = LLMPredictor(
|
||||||
|
llm=llm
|
||||||
|
)
|
||||||
|
|
||||||
|
service_context = ServiceContext.from_defaults(embed_model=embed_model, callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]), llm_predictor=llm_predictor, chunk_size=chunk_size)
|
||||||
|
|
||||||
|
index = VectorStoreIndex.from_vector_store(
|
||||||
|
vector_store,
|
||||||
|
storage_context=storage_context,
|
||||||
|
service_context=service_context
|
||||||
|
)
|
||||||
|
|
||||||
|
query_engine = index.as_query_engine(vector_store_query_mode=query_mode, similarity_top_k=topK, alpha=alpha, streaming=True)
|
||||||
|
|
||||||
|
cl.user_session.set("query_engine", query_engine)
|
||||||
|
|
||||||
|
|
||||||
|
@cl.on_message
|
||||||
|
async def main(message: cl.Message):
|
||||||
|
query_engine = cl.user_session.get("query_engine")
|
||||||
|
response = await cl.make_async(query_engine.query)(message.content)
|
||||||
|
|
||||||
|
response_message = cl.Message(content="")
|
||||||
|
|
||||||
|
for token in response.response_gen:
|
||||||
|
await response_message.stream_token(token=token)
|
||||||
|
|
||||||
|
if response.response_txt:
|
||||||
|
response_message.content = response.response_txt
|
||||||
|
|
||||||
|
await response_message.send()
|
||||||
7
examples/chainlit/requirements.txt
Normal file
7
examples/chainlit/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
llama_hub==0.0.41
|
||||||
|
llama_index==0.8.55
|
||||||
|
Requests==2.31.0
|
||||||
|
weaviate_client==3.25.1
|
||||||
|
transformers
|
||||||
|
torch
|
||||||
|
chainlit
|
||||||
@@ -24,10 +24,13 @@ docker-compose up -d --pull always
|
|||||||
# docker-compose up -d --build
|
# docker-compose up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Then browse to `http://localhost:3000` to view the Web UI.
|
||||||
|
|
||||||
## Pointing chatbot-ui to a separately managed LocalAI service
|
## Pointing chatbot-ui to a separately managed LocalAI service
|
||||||
|
|
||||||
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
|
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose.yaml` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
|
||||||
```
|
|
||||||
|
```yaml
|
||||||
version: '3.6'
|
version: '3.6'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
@@ -40,9 +43,8 @@ services:
|
|||||||
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
|
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
|
||||||
```
|
```
|
||||||
|
|
||||||
Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
|
Once you've edited the `docker-compose.yaml`, you can start it with `docker compose up`, then browse to `http://localhost:3000` to view the Web UI.
|
||||||
|
|
||||||
## Accessing chatbot-ui
|
## Accessing chatbot-ui
|
||||||
|
|
||||||
Open http://localhost:3000 for the Web UI.
|
Open http://localhost:3000 for the Web UI.
|
||||||
|
|
||||||
|
|||||||
1
examples/chatbot-ui-manual/models
Symbolic link
1
examples/chatbot-ui-manual/models
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../models
|
||||||
@@ -20,10 +20,13 @@ docker-compose up --pull always
|
|||||||
# docker-compose up -d --build
|
# docker-compose up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Then browse to `http://localhost:3000` to view the Web UI.
|
||||||
|
|
||||||
## Pointing chatbot-ui to a separately managed LocalAI service
|
## Pointing chatbot-ui to a separately managed LocalAI service
|
||||||
|
|
||||||
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
|
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose.yaml` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
|
||||||
```
|
|
||||||
|
```yaml
|
||||||
version: '3.6'
|
version: '3.6'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
@@ -36,9 +39,8 @@ services:
|
|||||||
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
|
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
|
||||||
```
|
```
|
||||||
|
|
||||||
Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
|
Once you've edited the `docker-compose.yaml`, you can start it with `docker compose up`, then browse to `http://localhost:3000` to view the Web UI.
|
||||||
|
|
||||||
## Accessing chatbot-ui
|
## Accessing chatbot-ui
|
||||||
|
|
||||||
Open http://localhost:3000 for the Web UI.
|
Open http://localhost:3000 for the Web UI.
|
||||||
|
|
||||||
|
|||||||
42
examples/configurations/README.md
Normal file
42
examples/configurations/README.md
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
## Advanced configuration
|
||||||
|
|
||||||
|
This section contains examples on how to install models manually with config files.
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
First clone LocalAI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/go-skynet/LocalAI
|
||||||
|
|
||||||
|
cd LocalAI
|
||||||
|
```
|
||||||
|
|
||||||
|
Setup the model you prefer from the examples below and then start LocalAI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d --pull always
|
||||||
|
```
|
||||||
|
|
||||||
|
If LocalAI is already started, you can restart it with
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose restart
|
||||||
|
```
|
||||||
|
|
||||||
|
See also the getting started: https://localai.io/basics/getting_started/
|
||||||
|
|
||||||
|
### Mistral
|
||||||
|
|
||||||
|
To setup mistral copy the files inside `mistral` in the `models` folder:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp -r examples/configurations/mistral/* models/
|
||||||
|
```
|
||||||
|
|
||||||
|
Now download the model:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf -O models/mistral-7b-openorca.Q6_K.gguf
|
||||||
|
```
|
||||||
|
|
||||||
18
examples/configurations/llava/README.md
Normal file
18
examples/configurations/llava/README.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|

|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir models
|
||||||
|
wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf -O models/ggml-model-q4_k.gguf
|
||||||
|
wget https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf -O models/mmproj-model-f16.gguf
|
||||||
|
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:master --models-path /models --threads 4
|
||||||
|
```
|
||||||
|
|
||||||
|
## Try it out
|
||||||
|
|
||||||
|
```
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "llava",
|
||||||
|
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||||
|
```
|
||||||
3
examples/configurations/llava/chat-simple.tmpl
Normal file
3
examples/configurations/llava/chat-simple.tmpl
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||||
|
{{.Input}}
|
||||||
|
ASSISTANT:
|
||||||
20
examples/configurations/llava/llava.yaml
Normal file
20
examples/configurations/llava/llava.yaml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
context_size: 4096
|
||||||
|
f16: true
|
||||||
|
threads: 11
|
||||||
|
gpu_layers: 90
|
||||||
|
name: llava
|
||||||
|
mmap: true
|
||||||
|
backend: llama-cpp
|
||||||
|
roles:
|
||||||
|
user: "USER:"
|
||||||
|
assistant: "ASSISTANT:"
|
||||||
|
system: "SYSTEM:"
|
||||||
|
parameters:
|
||||||
|
model: ggml-model-q4_k.gguf
|
||||||
|
temperature: 0.2
|
||||||
|
top_k: 40
|
||||||
|
top_p: 0.95
|
||||||
|
template:
|
||||||
|
chat: chat-simple
|
||||||
|
mmproj: mmproj-model-f16.gguf
|
||||||
3
examples/configurations/mistral/chatml-block.tmpl
Normal file
3
examples/configurations/mistral/chatml-block.tmpl
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{{.Input}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
|
||||||
3
examples/configurations/mistral/chatml.tmpl
Normal file
3
examples/configurations/mistral/chatml.tmpl
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||||
|
{{if .Content}}{{.Content}}{{end}}
|
||||||
|
<|im_end|>
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user