mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-06 04:33:30 -05:00
Compare commits
263 Commits
enable_gpu
...
v2.8.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e690bf387a | ||
|
|
5e155fb081 | ||
|
|
39a6b562cf | ||
|
|
c56b6ddb1c | ||
|
|
2e61ff32ad | ||
|
|
02f6e18adc | ||
|
|
4436e62cf1 | ||
|
|
6e0eb96c61 | ||
|
|
fd68bf7084 | ||
|
|
58cdf97361 | ||
|
|
53dbe36f32 | ||
|
|
081bd07fd1 | ||
|
|
ef1306f703 | ||
|
|
3196967995 | ||
|
|
3875e5e0e5 | ||
|
|
fc8423392f | ||
|
|
f1f6035967 | ||
|
|
ddd21f1644 | ||
|
|
d0a6a35b55 | ||
|
|
e0632f2ce2 | ||
|
|
37e6974afe | ||
|
|
e23e490455 | ||
|
|
f76bb8954b | ||
|
|
d168c7c9dc | ||
|
|
fd9d060c94 | ||
|
|
d8b17795d7 | ||
|
|
ea7b33b0d2 | ||
|
|
8ace0a9ba7 | ||
|
|
98ad93d53e | ||
|
|
38e4ec0b2a | ||
|
|
f083a901fe | ||
|
|
df13ba655c | ||
|
|
7678b25755 | ||
|
|
c87ca4f320 | ||
|
|
3c24a70a1b | ||
|
|
e46db63e06 | ||
|
|
1c57f8d077 | ||
|
|
16cebf0390 | ||
|
|
555bc02665 | ||
|
|
c1bae1ee81 | ||
|
|
f2ed3df3da | ||
|
|
abd678e147 | ||
|
|
6ac5d814fb | ||
|
|
f928899338 | ||
|
|
5a6fd98839 | ||
|
|
072f71dfb7 | ||
|
|
670cee8274 | ||
|
|
9f1be45552 | ||
|
|
f1846ae5ac | ||
|
|
ac19998e5e | ||
|
|
cb7512734d | ||
|
|
3733250b3c | ||
|
|
da3cd8993d | ||
|
|
7690caf020 | ||
|
|
5e335eaead | ||
|
|
d5d82ba344 | ||
|
|
efe2883c5d | ||
|
|
47237c7c3c | ||
|
|
697c769b64 | ||
|
|
94261b1717 | ||
|
|
eaf85a30f9 | ||
|
|
6a88b030ea | ||
|
|
f538416fb3 | ||
|
|
06cd9ef98d | ||
|
|
f3d71f8819 | ||
|
|
b7127c2dc9 | ||
|
|
b2dc5fbd7e | ||
|
|
9e653d6abe | ||
|
|
52c9a7f45d | ||
|
|
ee42c9bfe6 | ||
|
|
e6c3e483a1 | ||
|
|
3a253c6cd7 | ||
|
|
e9c3bbc6d7 | ||
|
|
23d64ac53a | ||
|
|
34f9f20ff4 | ||
|
|
a4a72a79ae | ||
|
|
6ca4d38a01 | ||
|
|
b5c93f176a | ||
|
|
1aaf88098d | ||
|
|
6f447e613d | ||
|
|
dfb7c3b1aa | ||
|
|
b41eb5e1f3 | ||
|
|
9c2d264979 | ||
|
|
b996c3198c | ||
|
|
f879c07c86 | ||
|
|
441e2965ff | ||
|
|
cbe9a03e3c | ||
|
|
4ee7e73d00 | ||
|
|
1cca449726 | ||
|
|
faf7c1c325 | ||
|
|
58288494d6 | ||
|
|
72283dc744 | ||
|
|
b8240b4c18 | ||
|
|
5309da40b7 | ||
|
|
08b90b4720 | ||
|
|
2e890b3838 | ||
|
|
06656fc057 | ||
|
|
574fa67bdc | ||
|
|
e19d7226f8 | ||
|
|
0843fe6c65 | ||
|
|
62a02cd1fe | ||
|
|
949da7792d | ||
|
|
ce724a7e55 | ||
|
|
0a06c80801 | ||
|
|
edc55ade61 | ||
|
|
09e5d9007b | ||
|
|
db926896bd | ||
|
|
ab7b4d5ee9 | ||
|
|
bcf02449b3 | ||
|
|
d48faf35ab | ||
|
|
583bd28a5c | ||
|
|
7e1d8c489b | ||
|
|
de28867374 | ||
|
|
a1aa6cb7c2 | ||
|
|
85e2767dca | ||
|
|
fd48cb6506 | ||
|
|
522659eb59 | ||
|
|
f068efe509 | ||
|
|
726fe416bb | ||
|
|
66fa4f1767 | ||
|
|
d6565f3b99 | ||
|
|
27686ff20b | ||
|
|
a8b865022f | ||
|
|
c1888a8062 | ||
|
|
a95bb0521d | ||
|
|
e2311a145c | ||
|
|
d4e0bab6be | ||
|
|
5b0dc20e4c | ||
|
|
9723c3c21d | ||
|
|
9dc32275ad | ||
|
|
611c11f57b | ||
|
|
763d1f524a | ||
|
|
6428003c3b | ||
|
|
2eac4f93bb | ||
|
|
24adf9cbcb | ||
|
|
c45f581c47 | ||
|
|
ae0c48e6bd | ||
|
|
4ca649154d | ||
|
|
66dd387858 | ||
|
|
9789f5a96a | ||
|
|
cae7b197ec | ||
|
|
f7621b2c6c | ||
|
|
95eb72bfd3 | ||
|
|
7e2d101a46 | ||
|
|
6597881854 | ||
|
|
eaa899df63 | ||
|
|
16ed0bd0c5 | ||
|
|
939187a129 | ||
|
|
4b520c3343 | ||
|
|
51215d480a | ||
|
|
987f0041d3 | ||
|
|
a29de9bf50 | ||
|
|
9bd5831fda | ||
|
|
59f0f2f0fd | ||
|
|
9ae47d37e9 | ||
|
|
2b3ad7f41c | ||
|
|
51db10b18f | ||
|
|
b4b21a446b | ||
|
|
23eced1644 | ||
|
|
7741a6e75d | ||
|
|
d4210db0c9 | ||
|
|
17dde75107 | ||
|
|
1fc3a375df | ||
|
|
64a8471dd5 | ||
|
|
86a8df1c8b | ||
|
|
2eeed2287b | ||
|
|
3d83128f16 | ||
|
|
1c286c3c2f | ||
|
|
2f7beb6744 | ||
|
|
ab0370a0b9 | ||
|
|
3f9a41684a | ||
|
|
dd982acf2c | ||
|
|
fb6a5bc620 | ||
|
|
7641f92cde | ||
|
|
72325fd0a3 | ||
|
|
1b7ed5e2e6 | ||
|
|
86fac272d8 | ||
|
|
865e523ff1 | ||
|
|
9aa2a7ca13 | ||
|
|
e80cbca6b0 | ||
|
|
718a5d4a9e | ||
|
|
9222bec8b1 | ||
|
|
4a965e1b0e | ||
|
|
48e5380e45 | ||
|
|
831418612b | ||
|
|
89ff12309d | ||
|
|
3a4fb6fa4b | ||
|
|
b181503c30 | ||
|
|
887b3dff04 | ||
|
|
3822bd2369 | ||
|
|
4de2c6a421 | ||
|
|
6c4231fd35 | ||
|
|
adfa7aa1fa | ||
|
|
8b6e601405 | ||
|
|
6011911746 | ||
|
|
997119c27a | ||
|
|
2eb6865a27 | ||
|
|
2b2d6673ff | ||
|
|
563c5b7ea0 | ||
|
|
67966b623c | ||
|
|
9fc3fd04be | ||
|
|
238fec244a | ||
|
|
3d71bc9b64 | ||
|
|
3923024d84 | ||
|
|
710b195be1 | ||
|
|
6e408137ee | ||
|
|
9b205cfcfc | ||
|
|
42a80d1b8b | ||
|
|
d6073ac18e | ||
|
|
1c450d46cf | ||
|
|
6b312a8522 | ||
|
|
2b2007ae9e | ||
|
|
e94a34be8c | ||
|
|
c3fb4b1d8e | ||
|
|
e3ca1a7dbe | ||
|
|
2d64d8b444 | ||
|
|
9b98be160a | ||
|
|
9f708ff318 | ||
|
|
4e0ad33d92 | ||
|
|
519285bf38 | ||
|
|
fd1b7b3f22 | ||
|
|
687730a7f5 | ||
|
|
b7821361c3 | ||
|
|
63e1f8fffd | ||
|
|
824612f1b4 | ||
|
|
9482acfdfc | ||
|
|
c75bdd99e4 | ||
|
|
6f34e8f044 | ||
|
|
6d187af643 | ||
|
|
97e9598c79 | ||
|
|
5a6a6de3d7 | ||
|
|
b1a20effde | ||
|
|
ba5ab26f2e | ||
|
|
69f53211a1 | ||
|
|
9dddd1134d | ||
|
|
c5c77d2b0d | ||
|
|
763f94ca80 | ||
|
|
20d637e7b7 | ||
|
|
480b14c8dc | ||
|
|
999db4301a | ||
|
|
92cbc4d516 | ||
|
|
ff9afdb0fe | ||
|
|
3e35b20a02 | ||
|
|
9ea371d6cd | ||
|
|
7a0f9767da | ||
|
|
9d7363f2a7 | ||
|
|
8ee5cf38fd | ||
|
|
a6b788d220 | ||
|
|
ccd87cd9f0 | ||
|
|
b5af87fc6c | ||
|
|
3c9544b023 | ||
|
|
2f65671070 | ||
|
|
8c5436cbed | ||
|
|
548959b50f | ||
|
|
2addb9f99a | ||
|
|
fdd95d1d86 | ||
|
|
66a558ff41 | ||
|
|
733b612eb2 | ||
|
|
991ecce004 | ||
|
|
ad0e30bca5 | ||
|
|
55461188a4 | ||
|
|
5d2405fdef | ||
|
|
e9f1268225 |
19
.env
19
.env
@@ -69,4 +69,21 @@ MODELS_PATH=/models
|
|||||||
# PYTHON_GRPC_MAX_WORKERS=1
|
# PYTHON_GRPC_MAX_WORKERS=1
|
||||||
|
|
||||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||||
# LLAMACPP_PARALLEL=1
|
# LLAMACPP_PARALLEL=1
|
||||||
|
|
||||||
|
### Enable to run parallel requests
|
||||||
|
# PARALLEL_REQUESTS=true
|
||||||
|
|
||||||
|
### Watchdog settings
|
||||||
|
###
|
||||||
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
|
# WATCHDOG_IDLE=true
|
||||||
|
#
|
||||||
|
# Enables watchdog to kill backends that are busy for too much time
|
||||||
|
# WATCHDOG_BUSY=true
|
||||||
|
#
|
||||||
|
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
||||||
|
# WATCHDOG_IDLE_TIMEOUT=5m
|
||||||
|
#
|
||||||
|
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||||
|
# WATCHDOG_BUSY_TIMEOUT=5m
|
||||||
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -2,9 +2,7 @@
|
|||||||
name: Bug report
|
name: Bug report
|
||||||
about: Create a report to help us improve
|
about: Create a report to help us improve
|
||||||
title: ''
|
title: ''
|
||||||
labels: bug
|
labels: bug, unconfirmed, up-for-grabs
|
||||||
assignees: mudler
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
|
<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
|
||||||
|
|||||||
4
.github/ISSUE_TEMPLATE/feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -2,9 +2,7 @@
|
|||||||
name: Feature request
|
name: Feature request
|
||||||
about: Suggest an idea for this project
|
about: Suggest an idea for this project
|
||||||
title: ''
|
title: ''
|
||||||
labels: enhancement
|
labels: enhancement, up-for-grabs
|
||||||
assignees: mudler
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
|
<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
|
||||||
|
|||||||
7
.github/bump_docs.sh
vendored
Executable file
7
.github/bump_docs.sh
vendored
Executable file
@@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -xe
|
||||||
|
REPO=$1
|
||||||
|
|
||||||
|
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
|
||||||
|
|
||||||
|
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
||||||
31
.github/workflows/bump_docs.yaml
vendored
Normal file
31
.github/workflows/bump_docs.yaml
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
name: Bump dependencies
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: 0 20 * * *
|
||||||
|
workflow_dispatch:
|
||||||
|
jobs:
|
||||||
|
bump:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- repository: "mudler/LocalAI"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Bump dependencies 🔧
|
||||||
|
run: |
|
||||||
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
|
- name: Create Pull Request
|
||||||
|
uses: peter-evans/create-pull-request@v5
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||||
|
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||||
|
branch: "update/docs"
|
||||||
|
body: Bump of ${{ matrix.repository }} version inside docs
|
||||||
|
signoff: true
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
100
.github/workflows/image-pr.yml
vendored
Normal file
100
.github/workflows/image-pr.yml
vendored
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
---
|
||||||
|
name: 'build container images tests'
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
extras-image-build:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: ''
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "1"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
core-image-build:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: ''
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "1"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
261
.github/workflows/image.yml
vendored
261
.github/workflows/image.yml
vendored
@@ -2,7 +2,6 @@
|
|||||||
name: 'build container images'
|
name: 'build container images'
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
@@ -14,8 +13,28 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
image-build:
|
extras-image-build:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
@@ -24,130 +43,164 @@ jobs:
|
|||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: ''
|
tag-suffix: ''
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-ffmpeg'
|
tag-suffix: '-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 11
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: 7
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11'
|
tag-suffix: '-cublas-cuda11'
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 12
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: 1
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12'
|
tag-suffix: '-cublas-cuda12'
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 11
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: 7
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: 12
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: 1
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
runs-on: arc-runner-set
|
runs-on: 'arc-runner-set'
|
||||||
steps:
|
base-image: "ubuntu:22.04"
|
||||||
- name: Force Install GIT latest
|
- build-type: ''
|
||||||
run: |
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
sudo apt-get update \
|
platforms: 'linux/amd64'
|
||||||
&& sudo apt-get install -y software-properties-common \
|
tag-latest: 'auto'
|
||||||
&& sudo apt-get update \
|
tag-suffix: ''
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
ffmpeg: ''
|
||||||
&& sudo apt-get update \
|
image-type: 'extras'
|
||||||
&& sudo apt-get install -y git
|
base-image: "ubuntu:22.04"
|
||||||
- name: Checkout
|
runs-on: 'arc-runner-set'
|
||||||
uses: actions/checkout@v4
|
core-image-build:
|
||||||
# - name: Release space from worker
|
uses: ./.github/workflows/image_build.yml
|
||||||
# run: |
|
with:
|
||||||
# echo "Listing top largest packages"
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
# head -n 30 <<< "${pkgs}"
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
# echo
|
image-type: ${{ matrix.image-type }}
|
||||||
# df -h
|
build-type: ${{ matrix.build-type }}
|
||||||
# echo
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
platforms: ${{ matrix.platforms }}
|
||||||
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
runs-on: ${{ matrix.runs-on }}
|
||||||
# sudo rm -rf /usr/local/lib/android
|
base-image: ${{ matrix.base-image }}
|
||||||
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
secrets:
|
||||||
# sudo rm -rf /usr/share/dotnet
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
# sudo apt-get remove -y '^mono-.*' || true
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
# sudo apt-get remove -y '^ghc-.*' || true
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
# sudo apt-get remove -y 'php.*' || true
|
strategy:
|
||||||
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
matrix:
|
||||||
# sudo apt-get remove -y '^google-.*' || true
|
include:
|
||||||
# sudo apt-get remove -y azure-cli || true
|
- build-type: ''
|
||||||
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
platforms: 'linux/amd64'
|
||||||
# sudo apt-get remove -y '^gfortran-.*' || true
|
tag-latest: 'false'
|
||||||
# sudo apt-get remove -y microsoft-edge-stable || true
|
tag-suffix: '-ffmpeg-core'
|
||||||
# sudo apt-get remove -y firefox || true
|
ffmpeg: 'true'
|
||||||
# sudo apt-get remove -y powershell || true
|
image-type: 'core'
|
||||||
# sudo apt-get remove -y r-base-core || true
|
base-image: "ubuntu:22.04"
|
||||||
# sudo apt-get autoremove -y
|
runs-on: 'ubuntu-latest'
|
||||||
# sudo apt-get clean
|
- build-type: 'sycl_f16'
|
||||||
# echo
|
platforms: 'linux/amd64'
|
||||||
# echo "Listing top largest packages"
|
tag-latest: 'false'
|
||||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
# head -n 30 <<< "${pkgs}"
|
tag-suffix: '-sycl-f16-core'
|
||||||
# echo
|
ffmpeg: 'false'
|
||||||
# sudo rm -rfv build || true
|
image-type: 'core'
|
||||||
# df -h
|
runs-on: 'arc-runner-set'
|
||||||
- name: Docker meta
|
- build-type: 'sycl_f32'
|
||||||
id: meta
|
platforms: 'linux/amd64'
|
||||||
uses: docker/metadata-action@v5
|
tag-latest: 'false'
|
||||||
with:
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
images: quay.io/go-skynet/local-ai
|
tag-suffix: '-sycl-f32-core'
|
||||||
tags: |
|
ffmpeg: 'false'
|
||||||
type=ref,event=branch
|
image-type: 'core'
|
||||||
type=semver,pattern={{raw}}
|
runs-on: 'arc-runner-set'
|
||||||
type=sha
|
- build-type: 'sycl_f16'
|
||||||
flavor: |
|
platforms: 'linux/amd64'
|
||||||
latest=${{ matrix.tag-latest }}
|
tag-latest: 'false'
|
||||||
suffix=${{ matrix.tag-suffix }}
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||||
- name: Set up QEMU
|
ffmpeg: 'true'
|
||||||
uses: docker/setup-qemu-action@master
|
image-type: 'core'
|
||||||
with:
|
runs-on: 'arc-runner-set'
|
||||||
platforms: all
|
- build-type: 'sycl_f32'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
- name: Set up Docker Buildx
|
tag-latest: 'false'
|
||||||
id: buildx
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
uses: docker/setup-buildx-action@master
|
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
- name: Login to DockerHub
|
image-type: 'core'
|
||||||
if: github.event_name != 'pull_request'
|
runs-on: 'arc-runner-set'
|
||||||
uses: docker/login-action@v3
|
- build-type: 'cublas'
|
||||||
with:
|
cuda-major-version: "11"
|
||||||
registry: quay.io
|
cuda-minor-version: "7"
|
||||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
platforms: 'linux/amd64'
|
||||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda11-core'
|
||||||
- name: Build and push
|
ffmpeg: ''
|
||||||
uses: docker/build-push-action@v5
|
image-type: 'core'
|
||||||
with:
|
base-image: "ubuntu:22.04"
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
runs-on: 'ubuntu-latest'
|
||||||
build-args: |
|
- build-type: 'cublas'
|
||||||
BUILD_TYPE=${{ matrix.build-type }}
|
cuda-major-version: "12"
|
||||||
CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }}
|
cuda-minor-version: "1"
|
||||||
CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }}
|
platforms: 'linux/amd64'
|
||||||
FFMPEG=${{ matrix.ffmpeg }}
|
tag-latest: 'false'
|
||||||
context: .
|
tag-suffix: '-cublas-cuda12-core'
|
||||||
file: ./Dockerfile
|
ffmpeg: ''
|
||||||
platforms: ${{ matrix.platforms }}
|
image-type: 'core'
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
base-image: "ubuntu:22.04"
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
runs-on: 'ubuntu-latest'
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "11"
|
||||||
|
cuda-minor-version: "7"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "1"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
|||||||
171
.github/workflows/image_build.yml
vendored
Normal file
171
.github/workflows/image_build.yml
vendored
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
---
|
||||||
|
name: 'build container images (reusable)'
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
base-image:
|
||||||
|
description: 'Base image'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
build-type:
|
||||||
|
description: 'Build type'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
cuda-major-version:
|
||||||
|
description: 'CUDA major version'
|
||||||
|
default: "11"
|
||||||
|
type: string
|
||||||
|
cuda-minor-version:
|
||||||
|
description: 'CUDA minor version'
|
||||||
|
default: "7"
|
||||||
|
type: string
|
||||||
|
platforms:
|
||||||
|
description: 'Platforms'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
tag-latest:
|
||||||
|
description: 'Tag latest'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
tag-suffix:
|
||||||
|
description: 'Tag suffix'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
ffmpeg:
|
||||||
|
description: 'FFMPEG'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
image-type:
|
||||||
|
description: 'Image type'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
runs-on:
|
||||||
|
description: 'Runs on'
|
||||||
|
required: true
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
secrets:
|
||||||
|
dockerUsername:
|
||||||
|
required: true
|
||||||
|
dockerPassword:
|
||||||
|
required: true
|
||||||
|
quayUsername:
|
||||||
|
required: true
|
||||||
|
quayPassword:
|
||||||
|
required: true
|
||||||
|
jobs:
|
||||||
|
reusable_image-build:
|
||||||
|
runs-on: ${{ inputs.runs-on }}
|
||||||
|
steps:
|
||||||
|
- name: Force Install GIT latest
|
||||||
|
run: |
|
||||||
|
sudo apt-get update \
|
||||||
|
&& sudo apt-get install -y software-properties-common \
|
||||||
|
&& sudo apt-get update \
|
||||||
|
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||||
|
&& sudo apt-get update \
|
||||||
|
&& sudo apt-get install -y git
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Release space from worker
|
||||||
|
if: inputs.runs-on == 'ubuntu-latest'
|
||||||
|
run: |
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
df -h
|
||||||
|
echo
|
||||||
|
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo rm -rf /usr/local/lib/android
|
||||||
|
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
sudo rm -rf /usr/share/dotnet
|
||||||
|
sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
sudo apt-get remove -y 'php.*' || true
|
||||||
|
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
sudo apt-get remove -y '^google-.*' || true
|
||||||
|
sudo apt-get remove -y azure-cli || true
|
||||||
|
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
sudo apt-get remove -y microsoft-edge-stable || true
|
||||||
|
sudo apt-get remove -y firefox || true
|
||||||
|
sudo apt-get remove -y powershell || true
|
||||||
|
sudo apt-get remove -y r-base-core || true
|
||||||
|
sudo apt-get autoremove -y
|
||||||
|
sudo apt-get clean
|
||||||
|
echo
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
sudo rm -rfv build || true
|
||||||
|
sudo rm -rf /usr/share/dotnet || true
|
||||||
|
sudo rm -rf /opt/ghc || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
|
df -h
|
||||||
|
- name: Docker meta
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
quay.io/go-skynet/local-ai
|
||||||
|
localai/localai
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=semver,pattern={{raw}}
|
||||||
|
type=sha
|
||||||
|
flavor: |
|
||||||
|
latest=${{ inputs.tag-latest }}
|
||||||
|
suffix=${{ inputs.tag-suffix }}
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@master
|
||||||
|
with:
|
||||||
|
platforms: all
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
id: buildx
|
||||||
|
uses: docker/setup-buildx-action@master
|
||||||
|
|
||||||
|
- name: Login to DockerHub
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.dockerUsername }}
|
||||||
|
password: ${{ secrets.dockerPassword }}
|
||||||
|
|
||||||
|
- name: Login to DockerHub
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: quay.io
|
||||||
|
username: ${{ secrets.quayUsername }}
|
||||||
|
password: ${{ secrets.quayPassword }}
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
|
build-args: |
|
||||||
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile
|
||||||
|
platforms: ${{ inputs.platforms }}
|
||||||
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
- name: job summary
|
||||||
|
run: |
|
||||||
|
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||||
55
.github/workflows/release.yaml
vendored
55
.github/workflows/release.yaml
vendored
@@ -5,6 +5,10 @@ on: push
|
|||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-linux:
|
build-linux:
|
||||||
strategy:
|
strategy:
|
||||||
@@ -16,6 +20,10 @@ jobs:
|
|||||||
defines: '-DLLAMA_AVX2=OFF'
|
defines: '-DLLAMA_AVX2=OFF'
|
||||||
- build: 'avx512'
|
- build: 'avx512'
|
||||||
defines: '-DLLAMA_AVX512=ON'
|
defines: '-DLLAMA_AVX512=ON'
|
||||||
|
- build: 'cuda12'
|
||||||
|
defines: ''
|
||||||
|
- build: 'cuda11'
|
||||||
|
defines: ''
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
@@ -29,19 +37,47 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
- name: Install CUDA Dependencies
|
||||||
|
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
|
||||||
|
run: |
|
||||||
|
if [ "${{ matrix.build }}" == "cuda12" ]; then
|
||||||
|
export CUDA_VERSION=12-3
|
||||||
|
else
|
||||||
|
export CUDA_VERSION=11-7
|
||||||
|
fi
|
||||||
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
|
- name: Cache grpc
|
||||||
|
id: cache-grpc
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
path: grpc
|
||||||
|
key: ${{ runner.os }}-grpc
|
||||||
|
- name: Build grpc
|
||||||
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make -j12 install
|
../.. && sudo make -j12
|
||||||
|
- name: Install gRPC
|
||||||
|
run: |
|
||||||
|
cd grpc && cd cmake/build && sudo make -j12 install
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
BUILD_ID: "${{ matrix.build }}"
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
STATIC=true make dist
|
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
|
||||||
|
export BUILD_TYPE=cublas
|
||||||
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
|
make dist
|
||||||
|
else
|
||||||
|
STATIC=true make dist
|
||||||
|
fi
|
||||||
- uses: actions/upload-artifact@v3
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.build }}
|
name: ${{ matrix.build }}
|
||||||
@@ -74,10 +110,7 @@ jobs:
|
|||||||
go-version: '>=1.21.0'
|
go-version: '>=1.21.0'
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
brew install protobuf grpc
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
../.. && make -j12 install && rm -rf grpc
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
@@ -96,4 +129,4 @@ jobs:
|
|||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
release/*
|
release/*
|
||||||
|
|||||||
317
.github/workflows/test-extra.yml
vendored
Normal file
317
.github/workflows/test-extra.yml
vendored
Normal file
@@ -0,0 +1,317 @@
|
|||||||
|
---
|
||||||
|
name: 'Tests extras backends'
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
tags:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-tests-extra-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
tests-transformers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test transformers
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/transformers
|
||||||
|
make -C backend/python/transformers test
|
||||||
|
|
||||||
|
tests-sentencetransformers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test sentencetransformers
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/sentencetransformers
|
||||||
|
make -C backend/python/sentencetransformers test
|
||||||
|
|
||||||
|
tests-diffusers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test diffusers
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/diffusers
|
||||||
|
make -C backend/python/diffusers test
|
||||||
|
|
||||||
|
|
||||||
|
tests-transformers-musicgen:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test transformers-musicgen
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/transformers-musicgen
|
||||||
|
make -C backend/python/transformers-musicgen test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
tests-petals:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test petals
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/petals
|
||||||
|
make -C backend/python/petals test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# tests-bark:
|
||||||
|
# runs-on: ubuntu-latest
|
||||||
|
# steps:
|
||||||
|
# - name: Release space from worker
|
||||||
|
# run: |
|
||||||
|
# echo "Listing top largest packages"
|
||||||
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
# head -n 30 <<< "${pkgs}"
|
||||||
|
# echo
|
||||||
|
# df -h
|
||||||
|
# echo
|
||||||
|
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
# sudo rm -rf /usr/local/lib/android
|
||||||
|
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
# sudo rm -rf /usr/share/dotnet
|
||||||
|
# sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
# sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
# sudo apt-get remove -y 'php.*' || true
|
||||||
|
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
# sudo apt-get remove -y '^google-.*' || true
|
||||||
|
# sudo apt-get remove -y azure-cli || true
|
||||||
|
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
# sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
# sudo apt-get remove -y microsoft-edge-stable || true
|
||||||
|
# sudo apt-get remove -y firefox || true
|
||||||
|
# sudo apt-get remove -y powershell || true
|
||||||
|
# sudo apt-get remove -y r-base-core || true
|
||||||
|
# sudo apt-get autoremove -y
|
||||||
|
# sudo apt-get clean
|
||||||
|
# echo
|
||||||
|
# echo "Listing top largest packages"
|
||||||
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
# head -n 30 <<< "${pkgs}"
|
||||||
|
# echo
|
||||||
|
# sudo rm -rfv build || true
|
||||||
|
# sudo rm -rf /usr/share/dotnet || true
|
||||||
|
# sudo rm -rf /opt/ghc || true
|
||||||
|
# sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
|
# df -h
|
||||||
|
# - name: Clone
|
||||||
|
# uses: actions/checkout@v4
|
||||||
|
# with:
|
||||||
|
# submodules: true
|
||||||
|
# - name: Dependencies
|
||||||
|
# run: |
|
||||||
|
# sudo apt-get update
|
||||||
|
# sudo apt-get install build-essential ffmpeg
|
||||||
|
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo apt-get update && \
|
||||||
|
# sudo apt-get install -y conda
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
# - name: Test bark
|
||||||
|
# run: |
|
||||||
|
# export PATH=$PATH:/opt/conda/bin
|
||||||
|
# make -C backend/python/bark
|
||||||
|
# make -C backend/python/bark test
|
||||||
|
|
||||||
|
|
||||||
|
# Below tests needs GPU. Commented out for now
|
||||||
|
# TODO: Re-enable as soon as we have GPU nodes
|
||||||
|
# tests-vllm:
|
||||||
|
# runs-on: ubuntu-latest
|
||||||
|
# steps:
|
||||||
|
# - name: Clone
|
||||||
|
# uses: actions/checkout@v4
|
||||||
|
# with:
|
||||||
|
# submodules: true
|
||||||
|
# - name: Dependencies
|
||||||
|
# run: |
|
||||||
|
# sudo apt-get update
|
||||||
|
# sudo apt-get install build-essential ffmpeg
|
||||||
|
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo apt-get update && \
|
||||||
|
# sudo apt-get install -y conda
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
|
# - name: Test vllm
|
||||||
|
# run: |
|
||||||
|
# export PATH=$PATH:/opt/conda/bin
|
||||||
|
# make -C backend/python/vllm
|
||||||
|
# make -C backend/python/vllm test
|
||||||
|
tests-vallex:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
- name: Test vall-e-x
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/vall-e-x
|
||||||
|
make -C backend/python/vall-e-x test
|
||||||
|
|
||||||
|
tests-coqui:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test coqui
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/coqui
|
||||||
|
make -C backend/python/coqui test
|
||||||
33
.github/workflows/test.yml
vendored
33
.github/workflows/test.yml
vendored
@@ -78,20 +78,30 @@ jobs:
|
|||||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/huggingface
|
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||||
|
|
||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
make go-piper && \
|
make sources/go-piper && \
|
||||||
GO_TAGS="tts" make -C go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv go-piper/piper/build/pi/lib/. /usr/lib/ && \
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||||
|
|
||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
- name: Cache grpc
|
||||||
|
id: cache-grpc
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
path: grpc
|
||||||
|
key: ${{ runner.os }}-grpc
|
||||||
|
- name: Build grpc
|
||||||
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make -j12 install
|
../.. && sudo make -j12
|
||||||
|
- name: Install gRPC
|
||||||
|
run: |
|
||||||
|
cd grpc && cd cmake/build && sudo make -j12 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
GO_TAGS="stablediffusion tts" make test
|
GO_TAGS="stablediffusion tts" make test
|
||||||
@@ -115,10 +125,7 @@ jobs:
|
|||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
brew install protobuf grpc
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
../.. && make -j12 install && rm -rf grpc
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
|||||||
10
.gitignore
vendored
10
.gitignore
vendored
@@ -1,15 +1,9 @@
|
|||||||
# go-llama build artifacts
|
# go-llama build artifacts
|
||||||
go-llama
|
/sources/
|
||||||
go-llama-stable
|
|
||||||
/gpt4all
|
|
||||||
go-stable-diffusion
|
|
||||||
go-piper
|
|
||||||
/go-bert
|
|
||||||
go-ggllm
|
|
||||||
/piper
|
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.a
|
*.a
|
||||||
get-sources
|
get-sources
|
||||||
|
prepare-sources
|
||||||
/backend/cpp/llama/grpc-server
|
/backend/cpp/llama/grpc-server
|
||||||
/backend/cpp/llama/llama.cpp
|
/backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
|
|||||||
6
.gitmodules
vendored
Normal file
6
.gitmodules
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
[submodule "docs/themes/hugo-theme-relearn"]
|
||||||
|
path = docs/themes/hugo-theme-relearn
|
||||||
|
url = https://github.com/McShelby/hugo-theme-relearn.git
|
||||||
|
[submodule "docs/themes/lotusdocs"]
|
||||||
|
path = docs/themes/lotusdocs
|
||||||
|
url = https://github.com/colinwilson/lotusdocs
|
||||||
115
Dockerfile
115
Dockerfile
@@ -1,10 +1,11 @@
|
|||||||
ARG GO_VERSION=1.21-bullseye
|
ARG GO_VERSION=1.21
|
||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
|
|
||||||
# extras or core
|
# extras or core
|
||||||
|
FROM ${BASE_IMAGE} as requirements-core
|
||||||
|
|
||||||
|
ARG GO_VERSION=1.21.7
|
||||||
FROM golang:$GO_VERSION as requirements-core
|
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=11
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ARG CUDA_MINOR_VERSION=7
|
ARG CUDA_MINOR_VERSION=7
|
||||||
@@ -12,13 +13,17 @@ ARG TARGETARCH
|
|||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/run.sh,autogptq:/build/extra/grpc/autogptq/run.sh,bark:/build/extra/grpc/bark/run.sh,diffusers:/build/extra/grpc/diffusers/run.sh,exllama:/build/extra/grpc/exllama/run.sh,vall-e-x:/build/extra/grpc/vall-e-x/run.sh,vllm:/build/extra/grpc/vllm/run.sh"
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
|
||||||
|
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
|
apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
|
||||||
|
|
||||||
|
# Install Go
|
||||||
|
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz
|
||||||
|
ENV PATH $PATH:/usr/local/go/bin
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
@@ -30,13 +35,13 @@ RUN echo "Target Variant: $TARGETVARIANT"
|
|||||||
# CuBLAS requirements
|
# CuBLAS requirements
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||||
apt-get install -y software-properties-common && \
|
apt-get install -y software-properties-common && \
|
||||||
apt-add-repository contrib && \
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
rm -f cuda-keyring_1.0-1_all.deb && \
|
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
@@ -62,25 +67,12 @@ RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmo
|
|||||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y conda
|
apt-get install -y conda && apt-get clean
|
||||||
|
|
||||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
RUN pip install --upgrade pip
|
RUN pip install --upgrade pip
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
#RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
RUN apt-get install -y espeak-ng espeak && apt-get clean
|
||||||
# pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
|
||||||
# fi
|
|
||||||
#RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
|
||||||
# pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
|
||||||
# fi
|
|
||||||
#RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
|
||||||
|
|
||||||
# Vall-e-X
|
|
||||||
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
|
||||||
|
|
||||||
# \
|
|
||||||
# ; fi
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
@@ -98,12 +90,9 @@ ENV NVIDIA_VISIBLE_DEVICES=all
|
|||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY Makefile .
|
|
||||||
RUN make get-sources
|
|
||||||
COPY go.mod .
|
|
||||||
RUN make prepare
|
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY .git .
|
COPY .git .
|
||||||
|
RUN make prepare
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
@@ -112,15 +101,15 @@ RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
|||||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && make -j12 install && rm -rf grpc \
|
../.. && make -j12 install \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Rebuild with defaults backends
|
# Rebuild with defaults backends
|
||||||
RUN make build
|
RUN make build
|
||||||
|
|
||||||
RUN if [ ! -d "/build/go-piper/piper/build/pi/lib/" ]; then \
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
mkdir -p /build/go-piper/piper/build/pi/lib/ \
|
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||||
touch /build/go-piper/piper/build/pi/lib/keep \
|
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
@@ -141,10 +130,11 @@ ARG CUDA_MAJOR_VERSION=11
|
|||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV PIP_CACHE_PURGE=true
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
apt-get install -y ffmpeg \
|
apt-get install -y ffmpeg && apt-get clean \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
@@ -154,49 +144,64 @@ WORKDIR /build
|
|||||||
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
|
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
|
||||||
# https://github.com/go-skynet/LocalAI/pull/434
|
# https://github.com/go-skynet/LocalAI/pull/434
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN make prepare-sources
|
|
||||||
|
COPY --from=builder /build/sources ./sources/
|
||||||
|
COPY --from=builder /build/grpc ./grpc/
|
||||||
|
|
||||||
|
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
|
||||||
|
|
||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
|
|
||||||
# Copy shared libraries for piper
|
# Copy shared libraries for piper
|
||||||
COPY --from=builder /build/go-piper/piper/build/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/autogptq \
|
PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/bark \
|
PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/diffusers \
|
PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/vllm \
|
PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/huggingface \
|
PATH=$PATH:/opt/conda/bin make -C backend/python/mamba \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/vall-e-x \
|
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
|
||||||
; fi
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
PATH=$PATH:/opt/conda/bin make -C extra/grpc/exllama \
|
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/coqui \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Copy VALLE-X as it's not a real "lib"
|
# Make sure the models directory exists
|
||||||
RUN if [ -d /usr/lib/vall-e-x ]; then \
|
RUN mkdir -p /build/models
|
||||||
cp -rfv /usr/lib/vall-e-x/* ./ ; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
# we also copy exllama libs over to resolve exllama import error
|
|
||||||
RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
|
|
||||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
|
|||||||
10
Entitlements.plist
Normal file
10
Entitlements.plist
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||||
|
<plist version="1.0">
|
||||||
|
<dict>
|
||||||
|
<key>com.apple.security.network.client</key>
|
||||||
|
<true/>
|
||||||
|
<key>com.apple.security.network.server</key>
|
||||||
|
<true/>
|
||||||
|
</dict>
|
||||||
|
</plist>
|
||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023 Ettore Di Giacinto
|
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
417
Makefile
417
Makefile
@@ -8,34 +8,35 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
|||||||
|
|
||||||
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||||
|
|
||||||
CPPLLAMA_VERSION?=a75fa576abba9d37f463580c379e4bbf1e1ad03c
|
CPPLLAMA_VERSION?=f026f8120f97090d34a52b3dc023c82e0ede3f7d
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
||||||
|
|
||||||
# go-ggml-transformers version
|
|
||||||
GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
|
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
RWKV_VERSION?=c898cd0f62df8f2a7830e53d1d513bef4f6f792b
|
RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_CPP_VERSION?=85ed71aaec8e0612a84c0b67804bde75aa75a273
|
WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_VERSION?=736f6fb639ab8e3397356e48eeb6bdcb9da88a78
|
PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07
|
||||||
|
|
||||||
# stablediffusion version
|
# stablediffusion version
|
||||||
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
STABLEDIFFUSION_VERSION?=d5d2be8e7e395c2d73ceef61e6fe8d240f2cd831
|
||||||
|
|
||||||
|
# tinydream version
|
||||||
|
TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
export CMAKE_ARGS?=
|
export CMAKE_ARGS?=
|
||||||
|
|
||||||
CGO_LDFLAGS?=
|
CGO_LDFLAGS?=
|
||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
@@ -68,29 +69,39 @@ ifndef UNAME_S
|
|||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(OS),Darwin)
|
||||||
CGO_LDFLAGS += -lcblas -framework Accelerate
|
CGO_LDFLAGS += -lcblas -framework Accelerate
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||||
# explicit disable metal if on Darwin and metal is disabled
|
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
endif
|
||||||
endif
|
|
||||||
|
# on OSX, if BUILD_TYPE is blank, we should default to use Metal
|
||||||
|
ifeq ($(BUILD_TYPE),)
|
||||||
|
BUILD_TYPE=metal
|
||||||
|
# disable metal if on Darwin and any other value is explicitly passed.
|
||||||
|
else ifneq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),openblas)
|
ifeq ($(BUILD_TYPE),openblas)
|
||||||
CGO_LDFLAGS+=-lopenblas
|
CGO_LDFLAGS+=-lopenblas
|
||||||
|
export WHISPER_OPENBLAS=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||||
export LLAMA_CUBLAS=1
|
export LLAMA_CUBLAS=1
|
||||||
|
export WHISPER_CUBLAS=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),hipblas)
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
ROCM_HOME ?= /opt/rocm
|
ROCM_HOME ?= /opt/rocm
|
||||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
# Llama-stable has no hipblas support, so override it here.
|
# llama-ggml has no hipblas support, so override it here.
|
||||||
export STABLE_BUILD_TYPE=
|
export STABLE_BUILD_TYPE=
|
||||||
|
export WHISPER_HIPBLAS=1
|
||||||
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
@@ -100,10 +111,12 @@ endif
|
|||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||||
export LLAMA_METAL=1
|
export LLAMA_METAL=1
|
||||||
|
export WHISPER_METAL=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),clblas)
|
ifeq ($(BUILD_TYPE),clblas)
|
||||||
CGO_LDFLAGS+=-lOpenCL -lclblast
|
CGO_LDFLAGS+=-lOpenCL -lclblast
|
||||||
|
export WHISPER_CLBLAST=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# glibc-static or glibc-devel-static required
|
# glibc-static or glibc-devel-static required
|
||||||
@@ -116,15 +129,29 @@ ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
|||||||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
|
||||||
|
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
|
||||||
|
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
PIPER_CGO_CXXFLAGS+=-I$(shell pwd)/go-piper/piper/src/cpp -I$(shell pwd)/go-piper/piper/build/fi/include -I$(shell pwd)/go-piper/piper/build/pi/include -I$(shell pwd)/go-piper/piper/build/si/include
|
PIPER_CGO_CXXFLAGS+=-I$(CURDIR)/sources/go-piper/piper/src/cpp -I$(CURDIR)/sources/go-piper/piper/build/fi/include -I$(CURDIR)/sources/go-piper/piper/build/pi/include -I$(CURDIR)/sources/go-piper/piper/build/si/include
|
||||||
PIPER_CGO_LDFLAGS+=-L$(shell pwd)/go-piper/piper/build/fi/lib -L$(shell pwd)/go-piper/piper/build/pi/lib -L$(shell pwd)/go-piper/piper/build/si/lib -lfmt -lspdlog
|
PIPER_CGO_LDFLAGS+=-L$(CURDIR)/sources/go-piper/piper/build/fi/lib -L$(CURDIR)/sources/go-piper/piper/build/pi/lib -L$(CURDIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
|
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||||
|
|
||||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||||
|
|
||||||
# If empty, then we build all
|
# If empty, then we build all
|
||||||
@@ -132,117 +159,126 @@ ifeq ($(GRPC_BACKENDS),)
|
|||||||
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
|
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_API_ONLY),true)
|
||||||
|
GRPC_BACKENDS=
|
||||||
|
endif
|
||||||
|
|
||||||
.PHONY: all test build vendor
|
.PHONY: all test build vendor
|
||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
## GPT4ALL
|
## GPT4ALL
|
||||||
gpt4all:
|
sources/gpt4all:
|
||||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
|
||||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
go-piper:
|
sources/go-piper:
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
|
||||||
cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
## BERT embeddings
|
## BERT embeddings
|
||||||
go-bert:
|
sources/go-bert:
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
|
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
||||||
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
## stable diffusion
|
## stable diffusion
|
||||||
go-stable-diffusion:
|
sources/go-stable-diffusion:
|
||||||
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion go-stable-diffusion
|
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
|
||||||
cd go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
go-stable-diffusion/libstablediffusion.a:
|
sources/go-stable-diffusion/libstablediffusion.a:
|
||||||
$(MAKE) -C go-stable-diffusion libstablediffusion.a
|
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||||
|
|
||||||
|
## tiny-dream
|
||||||
|
sources/go-tiny-dream:
|
||||||
|
git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
|
||||||
|
cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
|
sources/go-tiny-dream/libtinydream.a:
|
||||||
|
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
||||||
|
|
||||||
## RWKV
|
## RWKV
|
||||||
go-rwkv:
|
sources/go-rwkv:
|
||||||
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
|
||||||
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
go-rwkv/librwkv.a: go-rwkv
|
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
||||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
go-bert/libgobert.a: go-bert
|
sources/go-bert/libgobert.a: sources/go-bert
|
||||||
$(MAKE) -C go-bert libgobert.a
|
$(MAKE) -C sources/go-bert libgobert.a
|
||||||
|
|
||||||
backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||||
mkdir -p backend-assets/gpt4all
|
mkdir -p backend-assets/gpt4all
|
||||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
||||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||||
|
|
||||||
backend-assets/espeak-ng-data: go-piper
|
backend-assets/espeak-ng-data: sources/go-piper
|
||||||
mkdir -p backend-assets/espeak-ng-data
|
mkdir -p backend-assets/espeak-ng-data
|
||||||
$(MAKE) -C go-piper piper.o
|
$(MAKE) -C sources/go-piper piper.o
|
||||||
@cp -rf go-piper/piper/build/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||||
|
|
||||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
|
|
||||||
## CEREBRAS GPT
|
sources/whisper.cpp:
|
||||||
go-ggml-transformers:
|
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
|
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
go-ggml-transformers/libtransformers.a: go-ggml-transformers
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
$(MAKE) -C go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
|
cd sources/whisper.cpp && make libwhisper.a
|
||||||
|
|
||||||
whisper.cpp:
|
sources/go-llama:
|
||||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
|
||||||
cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
whisper.cpp/libwhisper.a: whisper.cpp
|
sources/go-llama-ggml:
|
||||||
cd whisper.cpp && make libwhisper.a
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
||||||
|
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
go-llama:
|
sources/go-llama/libbinding.a: sources/go-llama
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
$(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||||
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
go-llama-stable:
|
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-stable
|
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
cd go-llama-stable && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
go-llama/libbinding.a: go-llama
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main
|
||||||
|
|
||||||
go-llama-stable/libbinding.a: go-llama-stable
|
backend/cpp/llama/llama.cpp:
|
||||||
$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||||
|
|
||||||
go-piper/libpiper_binding.a: go-piper
|
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
||||||
$(MAKE) -C go-piper libpiper_binding.a example/main
|
|
||||||
|
|
||||||
get-sources: go-llama go-llama-stable go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert go-stable-diffusion
|
|
||||||
touch $@
|
touch $@
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
|
touch $@
|
||||||
|
|
||||||
## GENERIC
|
## GENERIC
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C go-llama clean
|
$(MAKE) -C sources/go-llama clean
|
||||||
$(MAKE) -C go-llama-stable clean
|
$(MAKE) -C sources/go-llama-ggml clean
|
||||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||||
$(MAKE) -C go-ggml-transformers clean
|
$(MAKE) -C sources/go-rwkv clean
|
||||||
$(MAKE) -C go-rwkv clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C whisper.cpp clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
$(MAKE) -C go-stable-diffusion clean
|
$(MAKE) -C sources/go-bert clean
|
||||||
$(MAKE) -C go-bert clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) -C go-piper clean
|
$(MAKE) -C sources/go-tiny-dream clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
@@ -251,38 +287,29 @@ prepare: prepare-sources $(OPTIONAL_TARGETS)
|
|||||||
clean: ## Remove build related file
|
clean: ## Remove build related file
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
rm -f prepare
|
rm -f prepare
|
||||||
rm -rf ./go-llama
|
rm -rf ./sources
|
||||||
rm -rf ./gpt4all
|
|
||||||
rm -rf ./go-llama-stable
|
|
||||||
rm -rf ./go-gpt2
|
|
||||||
rm -rf ./go-stable-diffusion
|
|
||||||
rm -rf ./go-ggml-transformers
|
|
||||||
rm -rf ./backend-assets
|
|
||||||
rm -rf ./go-rwkv
|
|
||||||
rm -rf ./go-bert
|
|
||||||
rm -rf ./whisper.cpp
|
|
||||||
rm -rf ./go-piper
|
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
rm -rf release/
|
rm -rf release/
|
||||||
rm -rf ./backend/cpp/grpc/grpc_repo
|
rm -rf backend-assets
|
||||||
rm -rf ./backend/cpp/grpc/build
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
rm -rf ./backend/cpp/grpc/installed_packages
|
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
|
|
||||||
build: grpcs prepare ## Build the project
|
build: backend-assets grpcs prepare ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||||
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||||
|
|
||||||
dist: build
|
dist: build
|
||||||
mkdir -p release
|
mkdir -p release
|
||||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
||||||
|
|
||||||
|
osx-signed: build
|
||||||
|
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
run: prepare ## run local-ai
|
run: prepare ## run local-ai
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
||||||
@@ -290,7 +317,7 @@ run: prepare ## run local-ai
|
|||||||
test-models/testmodel:
|
test-models/testmodel:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel
|
||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
@@ -306,7 +333,7 @@ test: prepare test-models/testmodel grpcs
|
|||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion"
|
export GO_TAGS="tts stablediffusion"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
|
||||||
$(MAKE) test-gpt4all
|
$(MAKE) test-gpt4all
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
@@ -373,40 +400,65 @@ help: ## Show this help.
|
|||||||
protogen: protogen-go protogen-python
|
protogen: protogen-go protogen-python
|
||||||
|
|
||||||
protogen-go:
|
protogen-go:
|
||||||
protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \
|
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||||
pkg/grpc/proto/backend.proto
|
backend/backend.proto
|
||||||
|
|
||||||
protogen-python:
|
protogen-python:
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/autogptq/ --grpc_python_out=extra/grpc/autogptq/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/exllama/ --grpc_python_out=extra/grpc/exllama/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/bark/ --grpc_python_out=extra/grpc/bark/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/diffusers/ --grpc_python_out=extra/grpc/diffusers/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vall-e-x/ --grpc_python_out=extra/grpc/vall-e-x/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
|
||||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vllm/ --grpc_python_out=extra/grpc/vllm/ pkg/grpc/proto/backend.proto
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto
|
||||||
|
|
||||||
## GRPC
|
## GRPC
|
||||||
# Note: it is duplicated in the Dockerfile
|
# Note: it is duplicated in the Dockerfile
|
||||||
prepare-extra-conda-environments:
|
prepare-extra-conda-environments:
|
||||||
$(MAKE) -C extra/grpc/autogptq
|
$(MAKE) -C backend/python/autogptq
|
||||||
$(MAKE) -C extra/grpc/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C extra/grpc/diffusers
|
$(MAKE) -C backend/python/coqui
|
||||||
$(MAKE) -C extra/grpc/vllm
|
$(MAKE) -C backend/python/diffusers
|
||||||
$(MAKE) -C extra/grpc/huggingface
|
$(MAKE) -C backend/python/vllm
|
||||||
$(MAKE) -C extra/grpc/vall-e-x
|
$(MAKE) -C backend/python/mamba
|
||||||
$(MAKE) -C extra/grpc/exllama
|
$(MAKE) -C backend/python/sentencetransformers
|
||||||
|
$(MAKE) -C backend/python/transformers
|
||||||
|
$(MAKE) -C backend/python/transformers-musicgen
|
||||||
|
$(MAKE) -C backend/python/vall-e-x
|
||||||
|
$(MAKE) -C backend/python/exllama
|
||||||
|
$(MAKE) -C backend/python/petals
|
||||||
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
|
prepare-test-extra:
|
||||||
|
$(MAKE) -C backend/python/transformers
|
||||||
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
|
||||||
|
test-extra: prepare-test-extra
|
||||||
|
$(MAKE) -C backend/python/transformers test
|
||||||
|
$(MAKE) -C backend/python/diffusers test
|
||||||
|
|
||||||
|
backend-assets:
|
||||||
|
mkdir -p backend-assets
|
||||||
|
ifeq ($(BUILD_API_ONLY),true)
|
||||||
|
touch backend-assets/keep
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc:
|
backend-assets/grpc:
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
|
||||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
# TODO: every binary should have its own folder instead, so can have different implementations
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
cp backend/cpp/llama/llama.cpp/ggml-metal.metal backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
## BACKEND CPP LLAMA START
|
## BACKEND CPP LLAMA START
|
||||||
@@ -421,17 +473,17 @@ ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
|||||||
|
|
||||||
backend/cpp/llama/grpc-server:
|
backend/cpp/llama/grpc-server:
|
||||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
backend/cpp/grpc/script/build_grpc.sh ${INSTALLED_PACKAGES}
|
$(MAKE) -C backend/cpp/grpc build
|
||||||
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
|
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
|
||||||
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
|
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
|
||||||
export PATH=${PATH}:${INSTALLED_PACKAGES}/bin && \
|
export PATH="${INSTALLED_PACKAGES}/bin:${PATH}" && \
|
||||||
CMAKE_ARGS="${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
else
|
else
|
||||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
endif
|
endif
|
||||||
## BACKEND CPP LLAMA END
|
## BACKEND CPP LLAMA END
|
||||||
|
|
||||||
##
|
##
|
||||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||||
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||||
@@ -440,71 +492,62 @@ ifeq ($(BUILD_TYPE),metal)
|
|||||||
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
|
backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-stable ./cmd/grpc/llama-stable/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||||
|
|
||||||
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||||
|
|
||||||
backend-assets/grpc/dolly: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./cmd/grpc/dolly/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||||
|
|
||||||
backend-assets/grpc/gpt2: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./cmd/grpc/gpt2/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||||
|
|
||||||
backend-assets/grpc/gptj: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./cmd/grpc/gptj/
|
|
||||||
|
|
||||||
backend-assets/grpc/gptneox: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./cmd/grpc/gptneox/
|
|
||||||
|
|
||||||
backend-assets/grpc/mpt: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./cmd/grpc/mpt/
|
|
||||||
|
|
||||||
backend-assets/grpc/replit: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./cmd/grpc/replit/
|
|
||||||
|
|
||||||
backend-assets/grpc/falcon-ggml: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./cmd/grpc/falcon-ggml/
|
|
||||||
|
|
||||||
backend-assets/grpc/starcoder: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./cmd/grpc/starcoder/
|
|
||||||
|
|
||||||
backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
|
||||||
|
|
||||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
||||||
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
||||||
$(MAKE) go-stable-diffusion/libstablediffusion.a; \
|
$(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/; \
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
backend-assets/grpc/tinydream: backend-assets/grpc sources/go-tiny-dream/libtinydream.a
|
||||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||||
|
|
||||||
backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
|
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||||
|
|
||||||
|
backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||||
|
|
||||||
grpcs: prepare $(GRPC_BACKENDS)
|
grpcs: prepare $(GRPC_BACKENDS)
|
||||||
|
|
||||||
|
DOCKER_IMAGE?=local-ai
|
||||||
|
IMAGE_TYPE?=core
|
||||||
|
BASE_IMAGE?=ubuntu:22.04
|
||||||
|
|
||||||
|
docker:
|
||||||
|
docker build \
|
||||||
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||||
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
|
--build-arg GO_TAGS=$(GO_TAGS) \
|
||||||
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
|
-t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
|
docker-image-intel:
|
||||||
|
docker build \
|
||||||
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
|
||||||
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
|
--build-arg GO_TAGS="none" \
|
||||||
|
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
||||||
|
|||||||
126
README.md
126
README.md
@@ -20,17 +20,15 @@
|
|||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
|
||||||
>
|
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/)
|
|
||||||
|
|
||||||
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
|
>
|
||||||
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format, pytorch and more. Does not require GPU.
|
|
||||||
|
|
||||||
<p align="center"><b>Follow LocalAI </b></p>
|
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||||
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
||||||
@@ -39,47 +37,35 @@
|
|||||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
<p align="center"><b>Connect with the Creator </b></p>
|
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
||||||
|
|
||||||
<p align="center">
|
## 🔥🔥 Hot topics / Roadmap
|
||||||
<a href="https://twitter.com/mudler_it" target="blank">
|
|
||||||
<img src="https://img.shields.io/twitter/follow/mudler_it?label=Follow: mudler_it&style=social" alt="Follow mudler_it"/>
|
|
||||||
</a>
|
|
||||||
<a href='https://github.com/mudler'>
|
|
||||||
<img alt="Follow on Github" src="https://img.shields.io/badge/Follow-mudler-black?logo=github&link=https%3A%2F%2Fgithub.com%2Fmudler">
|
|
||||||
</a>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p align="center"><b>Share LocalAI Repository</b></p>
|
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
<p align="center">
|
- Intel GPU support (sycl): https://github.com/mudler/LocalAI/issues/1653
|
||||||
|
- Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651
|
||||||
|
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
|
||||||
|
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
|
||||||
|
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
|
||||||
|
- Inline templates: https://github.com/mudler/LocalAI/pull/1452
|
||||||
|
- Mixtral: https://github.com/mudler/LocalAI/pull/1449
|
||||||
|
- Img2vid https://github.com/mudler/LocalAI/pull/1442
|
||||||
|
- Musicgen https://github.com/mudler/LocalAI/pull/1387
|
||||||
|
|
||||||
<a href="https://twitter.com/intent/tweet?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.&url=https://github.com/go-skynet/LocalAI&hashtags=LocalAI,AI" target="blank">
|
Hot topics (looking for contributors):
|
||||||
<img src="https://img.shields.io/twitter/follow/_LocalAI?label=Share Repo on Twitter&style=social" alt="Follow _LocalAI"/></a>
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
<a href="https://t.me/share/url?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.&url=https://github.com/go-skynet/LocalAI" target="_blank"><img src="https://img.shields.io/twitter/url?label=Telegram&logo=Telegram&style=social&url=https://github.com/go-skynet/LocalAI" alt="Share on Telegram"/></a>
|
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||||
<a href="https://api.whatsapp.com/send?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.%20https://github.com/go-skynet/LocalAI"><img src="https://img.shields.io/twitter/url?label=whatsapp&logo=whatsapp&style=social&url=https://github.com/go-skynet/LocalAI" /></a> <a href="https://www.reddit.com/submit?url=https://github.com/go-skynet/LocalAI&title=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.
|
|
||||||
" target="blank">
|
|
||||||
<img src="https://img.shields.io/twitter/url?label=Reddit&logo=Reddit&style=social&url=https://github.com/go-skynet/LocalAI" alt="Share on Reddit"/>
|
|
||||||
</a> <a href="mailto:?subject=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.%3A%0Ahttps://github.com/go-skynet/LocalAI" target="_blank"><img src="https://img.shields.io/twitter/url?label=Gmail&logo=Gmail&style=social&url=https://github.com/go-skynet/LocalAI"/></a> <a href="https://www.buymeacoffee.com/mudler" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="23" width="100" style="border-radius:1px"></a>
|
|
||||||
|
|
||||||
</p>
|
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||||
|
|
||||||
<hr>
|
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
In a nutshell:
|
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`:
|
||||||
|
|
||||||
- Local, OpenAI drop-in alternative REST API. You own your data.
|
```
|
||||||
- NO GPU required. NO Internet access is required either
|
docker run -ti -p 8080:8080 localai/localai:v2.7.0-ffmpeg-core phi-2
|
||||||
- Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. See also the [build section](https://localai.io/basics/build/index.html).
|
```
|
||||||
- Supports multiple models
|
|
||||||
- 🏃 Once loaded the first time, it keep models loaded in memory for faster inference
|
|
||||||
- ⚡ Doesn't shell-out, but uses C++ bindings for a faster inference and better performance.
|
|
||||||
|
|
||||||
LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
|
|
||||||
|
|
||||||
Note that this started just as a [fun weekend project](https://localai.io/#backstory) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!
|
|
||||||
|
|
||||||
## 🔥🔥 [Hot topics / Roadmap](https://localai.io/#-hot-topics--roadmap)
|
|
||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
@@ -91,7 +77,45 @@ Note that this started just as a [fun weekend project](https://localai.io/#backs
|
|||||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||||
|
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
|
|
||||||
|
## 💻 Usage
|
||||||
|
|
||||||
|
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||||
|
|
||||||
|
### 🔗 Community and integrations
|
||||||
|
|
||||||
|
Build and deploy custom containers:
|
||||||
|
- https://github.com/sozercan/aikit
|
||||||
|
|
||||||
|
WebUIs:
|
||||||
|
- https://github.com/Jirubizu/localai-admin
|
||||||
|
- https://github.com/go-skynet/LocalAI-frontend
|
||||||
|
|
||||||
|
Model galleries
|
||||||
|
- https://github.com/go-skynet/model-gallery
|
||||||
|
|
||||||
|
Auto Docker / Model setup
|
||||||
|
- https://io.midori-ai.xyz/howtos/easy-localai-installer/
|
||||||
|
- https://io.midori-ai.xyz/howtos/easy-model-installer/
|
||||||
|
|
||||||
|
Other:
|
||||||
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
|
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||||
|
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
|
||||||
|
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
||||||
|
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||||
|
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||||
|
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||||
|
|
||||||
|
### 🔗 Resources
|
||||||
|
|
||||||
|
- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
||||||
|
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||||
|
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||||
|
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
|
||||||
|
- [How tos section](https://io.midori-ai.xyz/howtos/) (curated by our community)
|
||||||
|
|
||||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||||
|
|
||||||
@@ -100,21 +124,6 @@ Note that this started just as a [fun weekend project](https://localai.io/#backs
|
|||||||
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
|
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
|
||||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65)
|
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65)
|
||||||
|
|
||||||
## 💻 Usage
|
|
||||||
|
|
||||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
|
||||||
|
|
||||||
### 💡 Example: Use Luna-AI Llama model
|
|
||||||
|
|
||||||
See the [documentation](https://localai.io/basics/getting_started)
|
|
||||||
|
|
||||||
### 🔗 Resources
|
|
||||||
|
|
||||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
|
||||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
|
||||||
- [Projects integrating LocalAI](https://localai.io/integrations/)
|
|
||||||
- [How tos section](https://localai.io/howtos/) (curated by our community)
|
|
||||||
|
|
||||||
## Citation
|
## Citation
|
||||||
|
|
||||||
If you utilize this repository, data in a downstream project, please consider citing it with:
|
If you utilize this repository, data in a downstream project, please consider citing it with:
|
||||||
@@ -137,12 +146,12 @@ Support the project by becoming [a backer or sponsor](https://github.com/sponsor
|
|||||||
|
|
||||||
A huge thank you to our generous sponsors who support this project:
|
A huge thank you to our generous sponsors who support this project:
|
||||||
|
|
||||||
|  |
|
|  |
|
||||||
|:-----------------------------------------------:|
|
|:-----------------------------------------------:|
|
||||||
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||||
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
||||||
|
|
||||||
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||||
|
|
||||||
- [Sponsor list](https://github.com/sponsors/mudler)
|
- [Sponsor list](https://github.com/sponsors/mudler)
|
||||||
- JDAM00 (donating HW for the CI)
|
- JDAM00 (donating HW for the CI)
|
||||||
@@ -169,7 +178,6 @@ LocalAI couldn't have been built without the help of great software already avai
|
|||||||
- https://github.com/ggerganov/whisper.cpp
|
- https://github.com/ggerganov/whisper.cpp
|
||||||
- https://github.com/saharNooby/rwkv.cpp
|
- https://github.com/saharNooby/rwkv.cpp
|
||||||
- https://github.com/rhasspy/piper
|
- https://github.com/rhasspy/piper
|
||||||
- https://github.com/cmp-nct/ggllm.cpp
|
|
||||||
|
|
||||||
## 🤗 Contributors
|
## 🤗 Contributors
|
||||||
|
|
||||||
|
|||||||
109
api/api.go
109
api/api.go
@@ -1,8 +1,10 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
@@ -13,6 +15,8 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/internal"
|
"github.com/go-skynet/LocalAI/internal"
|
||||||
"github.com/go-skynet/LocalAI/metrics"
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/startup"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||||
@@ -33,6 +37,8 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
|||||||
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
|
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
|
||||||
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
||||||
|
|
||||||
|
startup.PreloadModelsConfigurations(options.ModelLibraryURL, options.Loader.ModelPath, options.ModelsURL...)
|
||||||
|
|
||||||
cl := config.NewConfigLoader()
|
cl := config.NewConfigLoader()
|
||||||
if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
|
if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
|
||||||
log.Error().Msgf("error loading config files: %s", err.Error())
|
log.Error().Msgf("error loading config files: %s", err.Error())
|
||||||
@@ -44,6 +50,22 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := cl.Preload(options.Loader.ModelPath); err != nil {
|
||||||
|
log.Error().Msgf("error downloading models: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.PreloadJSONModels != "" {
|
||||||
|
if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.PreloadModelsFromPath != "" {
|
||||||
|
if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if options.Debug {
|
if options.Debug {
|
||||||
for _, v := range cl.ListConfigs() {
|
for _, v := range cl.ListConfigs() {
|
||||||
cfg, _ := cl.GetConfig(v)
|
cfg, _ := cl.GetConfig(v)
|
||||||
@@ -60,18 +82,6 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if options.PreloadJSONModels != "" {
|
|
||||||
if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cl, options.Galleries); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.PreloadModelsFromPath != "" {
|
|
||||||
if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cl, options.Galleries); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// turn off any process that was started by GRPC if the context is canceled
|
// turn off any process that was started by GRPC if the context is canceled
|
||||||
go func() {
|
go func() {
|
||||||
<-options.Context.Done()
|
<-options.Context.Done()
|
||||||
@@ -79,6 +89,22 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
|||||||
options.Loader.StopAllGRPC()
|
options.Loader.StopAllGRPC()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if options.WatchDog {
|
||||||
|
wd := model.NewWatchDog(
|
||||||
|
options.Loader,
|
||||||
|
options.WatchDogBusyTimeout,
|
||||||
|
options.WatchDogIdleTimeout,
|
||||||
|
options.WatchDogBusy,
|
||||||
|
options.WatchDogIdle)
|
||||||
|
options.Loader.SetWatchDog(wd)
|
||||||
|
go wd.Run()
|
||||||
|
go func() {
|
||||||
|
<-options.Context.Done()
|
||||||
|
log.Debug().Msgf("Context canceled, shutting down")
|
||||||
|
wd.Shutdown()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
return options, cl, nil
|
return options, cl, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -127,28 +153,46 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
|
|
||||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||||
auth := func(c *fiber.Ctx) error {
|
auth := func(c *fiber.Ctx) error {
|
||||||
if len(options.ApiKeys) > 0 {
|
if len(options.ApiKeys) == 0 {
|
||||||
authHeader := c.Get("Authorization")
|
return c.Next()
|
||||||
if authHeader == "" {
|
}
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
|
||||||
}
|
// Check for api_keys.json file
|
||||||
authHeaderParts := strings.Split(authHeader, " ")
|
fileContent, err := os.ReadFile("api_keys.json")
|
||||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
if err == nil {
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
// Parse JSON content from the file
|
||||||
|
var fileKeys []string
|
||||||
|
err := json.Unmarshal(fileContent, &fileKeys)
|
||||||
|
if err != nil {
|
||||||
|
return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
|
||||||
}
|
}
|
||||||
|
|
||||||
apiKey := authHeaderParts[1]
|
// Add file keys to options.ApiKeys
|
||||||
validApiKey := false
|
options.ApiKeys = append(options.ApiKeys, fileKeys...)
|
||||||
for _, key := range options.ApiKeys {
|
}
|
||||||
if apiKey == key {
|
|
||||||
validApiKey = true
|
if len(options.ApiKeys) == 0 {
|
||||||
}
|
return c.Next()
|
||||||
}
|
}
|
||||||
if !validApiKey {
|
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
authHeader := c.Get("Authorization")
|
||||||
|
if authHeader == "" {
|
||||||
|
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
||||||
|
}
|
||||||
|
authHeaderParts := strings.Split(authHeader, " ")
|
||||||
|
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
||||||
|
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
||||||
|
}
|
||||||
|
|
||||||
|
apiKey := authHeaderParts[1]
|
||||||
|
for _, key := range options.ApiKeys {
|
||||||
|
if apiKey == key {
|
||||||
|
return c.Next()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return c.Next()
|
|
||||||
|
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if options.CORS {
|
if options.CORS {
|
||||||
@@ -172,6 +216,11 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
}{Version: internal.PrintableVersion()})
|
}{Version: internal.PrintableVersion()})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Make sure directories exists
|
||||||
|
os.MkdirAll(options.ImageDir, 0755)
|
||||||
|
os.MkdirAll(options.AudioDir, 0755)
|
||||||
|
os.MkdirAll(options.Loader.ModelPath, 0755)
|
||||||
|
|
||||||
modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService)
|
modelGalleryService := localai.CreateModelGalleryService(options.Galleries, options.Loader.ModelPath, galleryService)
|
||||||
app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint())
|
app.Post("/models/apply", auth, modelGalleryService.ApplyModelGalleryEndpoint())
|
||||||
app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint())
|
app.Get("/models/available", auth, modelGalleryService.ListModelFromGalleryEndpoint())
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ import (
|
|||||||
. "github.com/go-skynet/LocalAI/api"
|
. "github.com/go-skynet/LocalAI/api"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/metrics"
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||||
"github.com/go-skynet/LocalAI/pkg/model"
|
"github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
@@ -29,6 +29,15 @@ import (
|
|||||||
"github.com/sashabaranov/go-openai/jsonschema"
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const testPrompt = `### System:
|
||||||
|
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
||||||
|
|
||||||
|
### User:
|
||||||
|
|
||||||
|
Can you help rephrasing sentences?
|
||||||
|
|
||||||
|
### Response:`
|
||||||
|
|
||||||
type modelApplyRequest struct {
|
type modelApplyRequest struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
@@ -61,7 +70,7 @@ func getModelStatus(url string) (response map[string]interface{}) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getModels(url string) (response []gallery.GalleryModel) {
|
func getModels(url string) (response []gallery.GalleryModel) {
|
||||||
utils.GetURI(url, func(url string, i []byte) error {
|
downloader.GetURI(url, func(url string, i []byte) error {
|
||||||
// Unmarshal YAML data into a struct
|
// Unmarshal YAML data into a struct
|
||||||
return json.Unmarshal(i, &response)
|
return json.Unmarshal(i, &response)
|
||||||
})
|
})
|
||||||
@@ -294,14 +303,14 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("runs openllama", Label("llama"), func() {
|
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
}
|
}
|
||||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||||
Name: "openllama_3b",
|
Name: "openllama_3b",
|
||||||
Overrides: map[string]interface{}{"backend": "llama-stable", "mmap": true, "f16": true, "context_size": 128},
|
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
|
||||||
})
|
})
|
||||||
|
|
||||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||||
@@ -362,9 +371,10 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(res["location"]).To(Equal("San Francisco, California, United States"), fmt.Sprint(res))
|
Expect(res["location"]).To(Equal("San Francisco, California, United States"), fmt.Sprint(res))
|
||||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
It("runs openllama gguf", Label("llama-gguf"), func() {
|
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
}
|
}
|
||||||
@@ -628,28 +638,28 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
||||||
})
|
})
|
||||||
It("can generate completions", func() {
|
It("can generate completions", func() {
|
||||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("can generate chat completions ", func() {
|
It("can generate chat completions ", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("can generate completions from model configs", func() {
|
It("can generate completions from model configs", func() {
|
||||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("can generate chat completions from model configs", func() {
|
It("can generate chat completions from model configs", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
@@ -657,7 +667,7 @@ var _ = Describe("API test", func() {
|
|||||||
|
|
||||||
It("returns errors", func() {
|
It("returns errors", func() {
|
||||||
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
|
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
|
||||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt})
|
||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
|
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
|
||||||
})
|
})
|
||||||
@@ -704,7 +714,7 @@ var _ = Describe("API test", func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
Context("External gRPC calls", func() {
|
Context("External gRPC calls", func() {
|
||||||
It("calculate embeddings with huggingface", func() {
|
It("calculate embeddings with sentencetransformers", func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
}
|
}
|
||||||
@@ -833,13 +843,13 @@ var _ = Describe("API test", func() {
|
|||||||
app.Shutdown()
|
app.Shutdown()
|
||||||
})
|
})
|
||||||
It("can generate chat completions from config file (list1)", func() {
|
It("can generate chat completions from config file (list1)", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
It("can generate chat completions from config file (list2)", func() {
|
It("can generate chat completions from config file (list2)", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
|
|||||||
|
|
||||||
var fn func() ([]float32, error)
|
var fn func() ([]float32, error)
|
||||||
switch model := inferenceModel.(type) {
|
switch model := inferenceModel.(type) {
|
||||||
case *grpc.Client:
|
case grpc.Backend:
|
||||||
fn = func() ([]float32, error) {
|
fn = func() ([]float32, error) {
|
||||||
predictOptions := gRPCPredictOpts(c, loader.ModelPath)
|
predictOptions := gRPCPredictOpts(c, loader.ModelPath)
|
||||||
if len(tokens) > 0 {
|
if len(tokens) > 0 {
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
model.WithModel(c.Model),
|
model.WithModel(c.Model),
|
||||||
model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
|
model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
|
||||||
CUDA: c.Diffusers.CUDA,
|
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||||
SchedulerType: c.Diffusers.SchedulerType,
|
SchedulerType: c.Diffusers.SchedulerType,
|
||||||
PipelineType: c.Diffusers.PipelineType,
|
PipelineType: c.Diffusers.PipelineType,
|
||||||
CFGScale: c.Diffusers.CFGScale,
|
CFGScale: c.Diffusers.CFGScale,
|
||||||
@@ -27,6 +27,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||||||
CLIPModel: c.Diffusers.ClipModel,
|
CLIPModel: c.Diffusers.ClipModel,
|
||||||
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||||
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||||
|
ControlNet: c.Diffusers.ControlNet,
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode
|
|||||||
|
|
||||||
grpcOpts := gRPCModelOpts(c)
|
grpcOpts := gRPCModelOpts(c)
|
||||||
|
|
||||||
var inferenceModel *grpc.Client
|
var inferenceModel grpc.Backend
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := modelOpts(c, o, []model.Option{
|
opts := modelOpts(c, o, []model.Option{
|
||||||
@@ -159,6 +159,9 @@ func Finetune(config config.Config, input, prediction string) string {
|
|||||||
for _, c := range config.TrimSpace {
|
for _, c := range config.TrimSpace {
|
||||||
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
||||||
}
|
}
|
||||||
return prediction
|
|
||||||
|
|
||||||
|
for _, c := range config.TrimSuffix {
|
||||||
|
prediction = strings.TrimSpace(strings.TrimSuffix(prediction, c))
|
||||||
|
}
|
||||||
|
return prediction
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,10 @@ func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.
|
|||||||
opts = append(opts, model.WithSingleActiveBackend())
|
opts = append(opts, model.WithSingleActiveBackend())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if o.ParallelBackendRequests {
|
||||||
|
opts = append(opts, model.EnableParallelRequests)
|
||||||
|
}
|
||||||
|
|
||||||
if c.GRPC.Attempts != 0 {
|
if c.GRPC.Attempts != 0 {
|
||||||
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
||||||
}
|
}
|
||||||
@@ -42,6 +46,7 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
|||||||
Seed: int32(c.Seed),
|
Seed: int32(c.Seed),
|
||||||
NBatch: int32(b),
|
NBatch: int32(b),
|
||||||
NoMulMatQ: c.NoMulMatQ,
|
NoMulMatQ: c.NoMulMatQ,
|
||||||
|
CUDA: c.CUDA, // diffusers, transformers
|
||||||
DraftModel: c.DraftModel,
|
DraftModel: c.DraftModel,
|
||||||
AudioPath: c.VallE.AudioPath,
|
AudioPath: c.VallE.AudioPath,
|
||||||
Quantization: c.Quantization,
|
Quantization: c.Quantization,
|
||||||
@@ -58,6 +63,8 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
|||||||
F16Memory: c.F16,
|
F16Memory: c.F16,
|
||||||
MLock: c.MMlock,
|
MLock: c.MMlock,
|
||||||
RopeFreqBase: c.RopeFreqBase,
|
RopeFreqBase: c.RopeFreqBase,
|
||||||
|
RopeScaling: c.RopeScaling,
|
||||||
|
Type: c.ModelType,
|
||||||
RopeFreqScale: c.RopeFreqScale,
|
RopeFreqScale: c.RopeFreqScale,
|
||||||
NUMA: c.NUMA,
|
NUMA: c.NUMA,
|
||||||
Embeddings: c.Embeddings,
|
Embeddings: c.Embeddings,
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
api_config "github.com/go-skynet/LocalAI/api/config"
|
api_config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
@@ -29,16 +30,20 @@ func generateUniqueFileName(dir, baseName, ext string) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
|
func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) {
|
||||||
bb := backend
|
bb := backend
|
||||||
if bb == "" {
|
if bb == "" {
|
||||||
bb = model.PiperBackend
|
bb = model.PiperBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpcOpts := gRPCModelOpts(c)
|
||||||
|
|
||||||
opts := modelOpts(api_config.Config{}, o, []model.Option{
|
opts := modelOpts(api_config.Config{}, o, []model.Option{
|
||||||
model.WithBackendString(bb),
|
model.WithBackendString(bb),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
|
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||||
})
|
})
|
||||||
piperModel, err := o.Loader.BackendLoader(opts...)
|
piperModel, err := o.Loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -59,9 +64,13 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *opt
|
|||||||
// If the model file is not empty, we pass it joined with the model path
|
// If the model file is not empty, we pass it joined with the model path
|
||||||
modelPath := ""
|
modelPath := ""
|
||||||
if modelFile != "" {
|
if modelFile != "" {
|
||||||
modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
|
if bb != model.TransformersMusicGen {
|
||||||
if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
|
modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
|
||||||
return "", nil, err
|
if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
modelPath = modelFile
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package api_config
|
package api_config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"os"
|
"os"
|
||||||
@@ -8,6 +9,9 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -38,14 +42,28 @@ type Config struct {
|
|||||||
|
|
||||||
// Diffusers
|
// Diffusers
|
||||||
Diffusers Diffusers `yaml:"diffusers"`
|
Diffusers Diffusers `yaml:"diffusers"`
|
||||||
|
Step int `yaml:"step"`
|
||||||
Step int `yaml:"step"`
|
|
||||||
|
|
||||||
// GRPC Options
|
// GRPC Options
|
||||||
GRPC GRPC `yaml:"grpc"`
|
GRPC GRPC `yaml:"grpc"`
|
||||||
|
|
||||||
// Vall-e-x
|
// Vall-e-x
|
||||||
VallE VallE `yaml:"vall-e"`
|
VallE VallE `yaml:"vall-e"`
|
||||||
|
|
||||||
|
// CUDA
|
||||||
|
// Explicitly enable CUDA or not (some backends might need it)
|
||||||
|
CUDA bool `yaml:"cuda"`
|
||||||
|
|
||||||
|
DownloadFiles []File `yaml:"download_files"`
|
||||||
|
|
||||||
|
Description string `yaml:"description"`
|
||||||
|
Usage string `yaml:"usage"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type File struct {
|
||||||
|
Filename string `yaml:"filename" json:"filename"`
|
||||||
|
SHA256 string `yaml:"sha256" json:"sha256"`
|
||||||
|
URI string `yaml:"uri" json:"uri"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type VallE struct {
|
type VallE struct {
|
||||||
@@ -65,15 +83,16 @@ type GRPC struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Diffusers struct {
|
type Diffusers struct {
|
||||||
|
CUDA bool `yaml:"cuda"`
|
||||||
PipelineType string `yaml:"pipeline_type"`
|
PipelineType string `yaml:"pipeline_type"`
|
||||||
SchedulerType string `yaml:"scheduler_type"`
|
SchedulerType string `yaml:"scheduler_type"`
|
||||||
CUDA bool `yaml:"cuda"`
|
|
||||||
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
||||||
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
||||||
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
||||||
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
||||||
ClipModel string `yaml:"clip_model"` // Clip model to use
|
ClipModel string `yaml:"clip_model"` // Clip model to use
|
||||||
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
||||||
|
ControlNet string `yaml:"control_net"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type LLMConfig struct {
|
type LLMConfig struct {
|
||||||
@@ -96,18 +115,22 @@ type LLMConfig struct {
|
|||||||
StopWords []string `yaml:"stopwords"`
|
StopWords []string `yaml:"stopwords"`
|
||||||
Cutstrings []string `yaml:"cutstrings"`
|
Cutstrings []string `yaml:"cutstrings"`
|
||||||
TrimSpace []string `yaml:"trimspace"`
|
TrimSpace []string `yaml:"trimspace"`
|
||||||
ContextSize int `yaml:"context_size"`
|
TrimSuffix []string `yaml:"trimsuffix"`
|
||||||
NUMA bool `yaml:"numa"`
|
|
||||||
LoraAdapter string `yaml:"lora_adapter"`
|
ContextSize int `yaml:"context_size"`
|
||||||
LoraBase string `yaml:"lora_base"`
|
NUMA bool `yaml:"numa"`
|
||||||
LoraScale float32 `yaml:"lora_scale"`
|
LoraAdapter string `yaml:"lora_adapter"`
|
||||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
LoraBase string `yaml:"lora_base"`
|
||||||
DraftModel string `yaml:"draft_model"`
|
LoraScale float32 `yaml:"lora_scale"`
|
||||||
NDraft int32 `yaml:"n_draft"`
|
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||||
Quantization string `yaml:"quantization"`
|
DraftModel string `yaml:"draft_model"`
|
||||||
MMProj string `yaml:"mmproj"`
|
NDraft int32 `yaml:"n_draft"`
|
||||||
|
Quantization string `yaml:"quantization"`
|
||||||
|
MMProj string `yaml:"mmproj"`
|
||||||
|
|
||||||
|
RopeScaling string `yaml:"rope_scaling"`
|
||||||
|
ModelType string `yaml:"type"`
|
||||||
|
|
||||||
RopeScaling string `yaml:"rope_scaling"`
|
|
||||||
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
|
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
|
||||||
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
||||||
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
||||||
@@ -160,6 +183,60 @@ func (c *Config) FunctionToCall() string {
|
|||||||
return c.functionCallNameString
|
return c.functionCallNameString
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Load a config file for a model
|
||||||
|
func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) {
|
||||||
|
// Load a config file if present after the model name
|
||||||
|
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||||
|
|
||||||
|
var cfg *Config
|
||||||
|
|
||||||
|
defaults := func() {
|
||||||
|
cfg = DefaultConfig(modelName)
|
||||||
|
cfg.ContextSize = ctx
|
||||||
|
cfg.Threads = threads
|
||||||
|
cfg.F16 = f16
|
||||||
|
cfg.Debug = debug
|
||||||
|
}
|
||||||
|
|
||||||
|
cfgExisting, exists := cm.GetConfig(modelName)
|
||||||
|
if !exists {
|
||||||
|
if _, err := os.Stat(modelConfig); err == nil {
|
||||||
|
if err := cm.LoadConfig(modelConfig); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||||
|
}
|
||||||
|
cfgExisting, exists = cm.GetConfig(modelName)
|
||||||
|
if exists {
|
||||||
|
cfg = &cfgExisting
|
||||||
|
} else {
|
||||||
|
defaults()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
defaults()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cfg = &cfgExisting
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the parameters for the language model prediction
|
||||||
|
//updateConfig(cfg, input)
|
||||||
|
|
||||||
|
// Don't allow 0 as setting
|
||||||
|
if cfg.Threads == 0 {
|
||||||
|
if threads != 0 {
|
||||||
|
cfg.Threads = threads
|
||||||
|
} else {
|
||||||
|
cfg.Threads = 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enforce debug flag if passed from CLI
|
||||||
|
if debug {
|
||||||
|
cfg.Debug = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
|
||||||
func defaultPredictOptions(modelFile string) PredictionOptions {
|
func defaultPredictOptions(modelFile string) PredictionOptions {
|
||||||
return PredictionOptions{
|
return PredictionOptions{
|
||||||
TopP: 0.7,
|
TopP: 0.7,
|
||||||
@@ -260,6 +337,67 @@ func (cm *ConfigLoader) ListConfigs() []string {
|
|||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preload prepare models if they are not local but url or huggingface repositories
|
||||||
|
func (cm *ConfigLoader) Preload(modelPath string) error {
|
||||||
|
cm.Lock()
|
||||||
|
defer cm.Unlock()
|
||||||
|
|
||||||
|
status := func(fileName, current, total string, percent float64) {
|
||||||
|
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info().Msgf("Preloading models from %s", modelPath)
|
||||||
|
|
||||||
|
for i, config := range cm.configs {
|
||||||
|
|
||||||
|
// Download files and verify their SHA
|
||||||
|
for _, file := range config.DownloadFiles {
|
||||||
|
log.Debug().Msgf("Checking %q exists and matches SHA", file.Filename)
|
||||||
|
|
||||||
|
if err := utils.VerifyPath(file.Filename, modelPath); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Create file path
|
||||||
|
filePath := filepath.Join(modelPath, file.Filename)
|
||||||
|
|
||||||
|
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
modelURL := config.PredictionOptions.Model
|
||||||
|
modelURL = downloader.ConvertURL(modelURL)
|
||||||
|
|
||||||
|
if downloader.LooksLikeURL(modelURL) {
|
||||||
|
// md5 of model name
|
||||||
|
md5Name := utils.MD5(modelURL)
|
||||||
|
|
||||||
|
// check if file exists
|
||||||
|
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
|
||||||
|
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cc := cm.configs[i]
|
||||||
|
c := &cc
|
||||||
|
c.PredictionOptions.Model = md5Name
|
||||||
|
cm.configs[i] = *c
|
||||||
|
}
|
||||||
|
if cm.configs[i].Name != "" {
|
||||||
|
log.Info().Msgf("Model name: %s", cm.configs[i].Name)
|
||||||
|
}
|
||||||
|
if cm.configs[i].Description != "" {
|
||||||
|
log.Info().Msgf("Model description: %s", cm.configs[i].Description)
|
||||||
|
}
|
||||||
|
if cm.configs[i].Usage != "" {
|
||||||
|
log.Info().Msgf("Model usage: \n%s", cm.configs[i].Usage)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (cm *ConfigLoader) LoadConfigs(path string) error {
|
func (cm *ConfigLoader) LoadConfigs(path string) error {
|
||||||
cm.Lock()
|
cm.Lock()
|
||||||
defer cm.Unlock()
|
defer cm.Unlock()
|
||||||
@@ -277,7 +415,7 @@ func (cm *ConfigLoader) LoadConfigs(path string) error {
|
|||||||
}
|
}
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
// Skip templates, YAML and .keep files
|
// Skip templates, YAML and .keep files
|
||||||
if !strings.Contains(file.Name(), ".yaml") {
|
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
c, err := ReadConfig(filepath.Join(path, file.Name()))
|
c, err := ReadConfig(filepath.Join(path, file.Name()))
|
||||||
|
|||||||
43
api/ctx/fiber.go
Normal file
43
api/ctx/fiber.go
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
package fiberContext
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ModelFromContext returns the model from the context
|
||||||
|
// If no model is specified, it will take the first available
|
||||||
|
// Takes a model string as input which should be the one received from the user request.
|
||||||
|
// It returns the model name resolved from the context and an error if any.
|
||||||
|
func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
|
||||||
|
if ctx.Params("model") != "" {
|
||||||
|
modelInput = ctx.Params("model")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set model from bearer token, if available
|
||||||
|
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
|
||||||
|
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||||
|
|
||||||
|
// If no model was specified, take the first available
|
||||||
|
if modelInput == "" && !bearerExists && firstModel {
|
||||||
|
models, _ := loader.ListModels()
|
||||||
|
if len(models) > 0 {
|
||||||
|
modelInput = models[0]
|
||||||
|
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||||
|
} else {
|
||||||
|
log.Debug().Msgf("No model specified, returning error")
|
||||||
|
return "", fmt.Errorf("no model specified")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a model is found in bearer token takes precedence
|
||||||
|
if bearerExists {
|
||||||
|
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||||
|
modelInput = bearer
|
||||||
|
}
|
||||||
|
return modelInput, nil
|
||||||
|
}
|
||||||
@@ -123,13 +123,12 @@ func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
client := bm.options.Loader.CheckIsLoaded(backendId)
|
model := bm.options.Loader.CheckIsLoaded(backendId)
|
||||||
|
if model == "" {
|
||||||
if client == nil {
|
|
||||||
return fmt.Errorf("backend %s is not currently loaded", backendId)
|
return fmt.Errorf("backend %s is not currently loaded", backendId)
|
||||||
}
|
}
|
||||||
|
|
||||||
status, rpcErr := client.Status(context.TODO())
|
status, rpcErr := model.GRPC(false, nil).Status(context.TODO())
|
||||||
if rpcErr != nil {
|
if rpcErr != nil {
|
||||||
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
|
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
|
||||||
val, slbErr := bm.SampleLocalBackendProcess(backendId)
|
val, slbErr := bm.SampleLocalBackendProcess(backendId)
|
||||||
|
|||||||
@@ -130,6 +130,12 @@ func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = cm.Preload(g.modelPath)
|
||||||
|
if err != nil {
|
||||||
|
updateError(err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
g.updateStatus(op.id, &galleryOpStatus{Processed: true, Message: "completed", Progress: 100})
|
g.updateStatus(op.id, &galleryOpStatus{Processed: true, Message: "completed", Progress: 100})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ package localai
|
|||||||
import (
|
import (
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
fiberContext "github.com/go-skynet/LocalAI/api/ctx"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@@ -18,12 +20,31 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
input := new(TTSRequest)
|
input := new(TTSRequest)
|
||||||
|
|
||||||
// Get input data from the request body
|
// Get input data from the request body
|
||||||
if err := c.BodyParser(input); err != nil {
|
if err := c.BodyParser(input); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o)
|
modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
}
|
||||||
|
cfg, err := config.Load(modelFile, o.Loader.ModelPath, cm, false, 0, 0, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
|
if input.Backend != "" {
|
||||||
|
cfg.Backend = input.Backend
|
||||||
|
}
|
||||||
|
|
||||||
|
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,12 +58,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
processFunctions := false
|
processFunctions := false
|
||||||
funcs := grammar.Functions{}
|
funcs := grammar.Functions{}
|
||||||
modelFile, input, err := readInput(c, o, true)
|
modelFile, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
@@ -81,7 +81,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.ResponseFormat == "json_object" {
|
if input.ResponseFormat.Type == "json_object" {
|
||||||
input.Grammar = grammar.JSONBNF
|
input.Grammar = grammar.JSONBNF
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -219,7 +219,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
c.Set("Transfer-Encoding", "chunked")
|
c.Set("Transfer-Encoding", "chunked")
|
||||||
}
|
}
|
||||||
|
|
||||||
templateFile := config.Model
|
templateFile := ""
|
||||||
|
|
||||||
|
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||||
|
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||||
|
templateFile = config.Model
|
||||||
|
}
|
||||||
|
|
||||||
if config.TemplateConfig.Chat != "" && !processFunctions {
|
if config.TemplateConfig.Chat != "" && !processFunctions {
|
||||||
templateFile = config.TemplateConfig.Chat
|
templateFile = config.TemplateConfig.Chat
|
||||||
@@ -229,18 +234,19 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
templateFile = config.TemplateConfig.Functions
|
templateFile = config.TemplateConfig.Functions
|
||||||
}
|
}
|
||||||
|
|
||||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
if templateFile != "" {
|
||||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
|
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
|
||||||
SystemPrompt: config.SystemPrompt,
|
SystemPrompt: config.SystemPrompt,
|
||||||
SuppressSystemPrompt: suppressConfigSystemPrompt,
|
SuppressSystemPrompt: suppressConfigSystemPrompt,
|
||||||
Input: predInput,
|
Input: predInput,
|
||||||
Functions: funcs,
|
Functions: funcs,
|
||||||
})
|
})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
predInput = templatedInput
|
predInput = templatedInput
|
||||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||||
} else {
|
} else {
|
||||||
log.Debug().Msgf("Template failed loading: %s", err.Error())
|
log.Debug().Msgf("Template failed loading: %s", err.Error())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Prompt (after templating): %s", predInput)
|
log.Debug().Msgf("Prompt (after templating): %s", predInput)
|
||||||
|
|||||||
@@ -53,19 +53,19 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
modelFile, input, err := readInput(c, o, true)
|
modelFile, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("`input`: %+v", input)
|
log.Debug().Msgf("`input`: %+v", input)
|
||||||
|
|
||||||
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.ResponseFormat == "json_object" {
|
if input.ResponseFormat.Type == "json_object" {
|
||||||
input.Grammar = grammar.JSONBNF
|
input.Grammar = grammar.JSONBNF
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -81,7 +81,12 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
c.Set("Transfer-Encoding", "chunked")
|
c.Set("Transfer-Encoding", "chunked")
|
||||||
}
|
}
|
||||||
|
|
||||||
templateFile := config.Model
|
templateFile := ""
|
||||||
|
|
||||||
|
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||||
|
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||||
|
templateFile = config.Model
|
||||||
|
}
|
||||||
|
|
||||||
if config.TemplateConfig.Completion != "" {
|
if config.TemplateConfig.Completion != "" {
|
||||||
templateFile = config.TemplateConfig.Completion
|
templateFile = config.TemplateConfig.Completion
|
||||||
@@ -94,13 +99,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
|
|
||||||
predInput := config.PromptStrings[0]
|
predInput := config.PromptStrings[0]
|
||||||
|
|
||||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
if templateFile != "" {
|
||||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||||
Input: predInput,
|
Input: predInput,
|
||||||
})
|
})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
predInput = templatedInput
|
predInput = templatedInput
|
||||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
responses := make(chan schema.OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
@@ -145,14 +151,16 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
totalTokenUsage := backend.TokenUsage{}
|
totalTokenUsage := backend.TokenUsage{}
|
||||||
|
|
||||||
for k, i := range config.PromptStrings {
|
for k, i := range config.PromptStrings {
|
||||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
if templateFile != "" {
|
||||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||||
SystemPrompt: config.SystemPrompt,
|
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||||
Input: i,
|
SystemPrompt: config.SystemPrompt,
|
||||||
})
|
Input: i,
|
||||||
if err == nil {
|
})
|
||||||
i = templatedInput
|
if err == nil {
|
||||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
i = templatedInput
|
||||||
|
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r, tokenUsage, err := ComputeChoices(
|
r, tokenUsage, err := ComputeChoices(
|
||||||
|
|||||||
@@ -18,19 +18,24 @@ import (
|
|||||||
|
|
||||||
func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
modelFile, input, err := readInput(c, o, true)
|
modelFile, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
templateFile := config.Model
|
templateFile := ""
|
||||||
|
|
||||||
|
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||||
|
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||||
|
templateFile = config.Model
|
||||||
|
}
|
||||||
|
|
||||||
if config.TemplateConfig.Edit != "" {
|
if config.TemplateConfig.Edit != "" {
|
||||||
templateFile = config.TemplateConfig.Edit
|
templateFile = config.TemplateConfig.Edit
|
||||||
@@ -40,15 +45,16 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
totalTokenUsage := backend.TokenUsage{}
|
totalTokenUsage := backend.TokenUsage{}
|
||||||
|
|
||||||
for _, i := range config.InputStrings {
|
for _, i := range config.InputStrings {
|
||||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
if templateFile != "" {
|
||||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
|
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
|
||||||
Input: i,
|
Input: i,
|
||||||
Instruction: input.Instruction,
|
Instruction: input.Instruction,
|
||||||
SystemPrompt: config.SystemPrompt,
|
SystemPrompt: config.SystemPrompt,
|
||||||
})
|
})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
i = templatedInput
|
i = templatedInput
|
||||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ import (
|
|||||||
// https://platform.openai.com/docs/api-reference/embeddings
|
// https://platform.openai.com/docs/api-reference/embeddings
|
||||||
func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
model, input, err := readInput(c, o, true)
|
model, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ import (
|
|||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -22,6 +24,26 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func downloadFile(url string) (string, error) {
|
||||||
|
// Get the data
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Create the file
|
||||||
|
out, err := os.CreateTemp("", "image")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
|
||||||
|
// Write the body to file
|
||||||
|
_, err = io.Copy(out, resp.Body)
|
||||||
|
return out.Name(), err
|
||||||
|
}
|
||||||
|
|
||||||
// https://platform.openai.com/docs/api-reference/images/create
|
// https://platform.openai.com/docs/api-reference/images/create
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -39,7 +61,7 @@ import (
|
|||||||
*/
|
*/
|
||||||
func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
m, input, err := readInput(c, o, false)
|
m, input, err := readRequest(c, o, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
@@ -49,19 +71,38 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
}
|
}
|
||||||
log.Debug().Msgf("Loading model: %+v", m)
|
log.Debug().Msgf("Loading model: %+v", m)
|
||||||
|
|
||||||
config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
|
config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
src := ""
|
src := ""
|
||||||
if input.File != "" {
|
if input.File != "" {
|
||||||
//base 64 decode the file and write it somewhere
|
|
||||||
// that we will cleanup
|
fileData := []byte{}
|
||||||
decoded, err := base64.StdEncoding.DecodeString(input.File)
|
// check if input.File is an URL, if so download it and save it
|
||||||
if err != nil {
|
// to a temporary file
|
||||||
return err
|
if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
|
||||||
|
out, err := downloadFile(input.File)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed downloading file:%w", err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(out)
|
||||||
|
|
||||||
|
fileData, err = os.ReadFile(out)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed reading file:%w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// base 64 decode the file and write it somewhere
|
||||||
|
// that we will cleanup
|
||||||
|
fileData, err = base64.StdEncoding.DecodeString(input.File)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a temporary file
|
// Create a temporary file
|
||||||
outputFile, err := os.CreateTemp(o.ImageDir, "b64")
|
outputFile, err := os.CreateTemp(o.ImageDir, "b64")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -69,7 +110,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
}
|
}
|
||||||
// write the base64 result
|
// write the base64 result
|
||||||
writer := bufio.NewWriter(outputFile)
|
writer := bufio.NewWriter(outputFile)
|
||||||
_, err = writer.Write(decoded)
|
_, err = writer.Write(fileData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
outputFile.Close()
|
outputFile.Close()
|
||||||
return err
|
return err
|
||||||
@@ -81,8 +122,12 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
// XXX: Only stablediffusion is supported for now
|
switch config.Backend {
|
||||||
if config.Backend == "" {
|
case "stablediffusion":
|
||||||
|
config.Backend = model.StableDiffusionBackend
|
||||||
|
case "tinydream":
|
||||||
|
config.Backend = model.TinyDreamBackend
|
||||||
|
case "":
|
||||||
config.Backend = model.StableDiffusionBackend
|
config.Backend = model.StableDiffusionBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,7 +145,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
}
|
}
|
||||||
|
|
||||||
b64JSON := false
|
b64JSON := false
|
||||||
if input.ResponseFormat == "b64_json" {
|
if input.ResponseFormat.Type == "b64_json" {
|
||||||
b64JSON = true
|
b64JSON = true
|
||||||
}
|
}
|
||||||
// src and clip_skip
|
// src and clip_skip
|
||||||
|
|||||||
@@ -7,11 +7,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
fiberContext "github.com/go-skynet/LocalAI/api/ctx"
|
||||||
options "github.com/go-skynet/LocalAI/api/options"
|
options "github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/api/schema"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
@@ -19,8 +18,7 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) {
|
func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
loader := o.Loader
|
|
||||||
input := new(schema.OpenAIRequest)
|
input := new(schema.OpenAIRequest)
|
||||||
ctx, cancel := context.WithCancel(o.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
input.Context = ctx
|
input.Context = ctx
|
||||||
@@ -30,38 +28,13 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
|||||||
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
modelFile := input.Model
|
|
||||||
|
|
||||||
if c.Params("model") != "" {
|
|
||||||
modelFile = c.Params("model")
|
|
||||||
}
|
|
||||||
|
|
||||||
received, _ := json.Marshal(input)
|
received, _ := json.Marshal(input)
|
||||||
|
|
||||||
log.Debug().Msgf("Request received: %s", string(received))
|
log.Debug().Msgf("Request received: %s", string(received))
|
||||||
|
|
||||||
// Set model from bearer token, if available
|
modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, firstModel)
|
||||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
|
||||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
|
||||||
|
|
||||||
// If no model was specified, take the first available
|
return modelFile, input, err
|
||||||
if modelFile == "" && !bearerExists && randomModel {
|
|
||||||
models, _ := loader.ListModels()
|
|
||||||
if len(models) > 0 {
|
|
||||||
modelFile = models[0]
|
|
||||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
|
||||||
} else {
|
|
||||||
log.Debug().Msgf("No model specified, returning error")
|
|
||||||
return "", nil, fmt.Errorf("no model specified")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If a model is found in bearer token takes precedence
|
|
||||||
if bearerExists {
|
|
||||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
|
||||||
modelFile = bearer
|
|
||||||
}
|
|
||||||
return modelFile, input, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||||
@@ -95,7 +68,7 @@ func getBase64Image(s string) (string, error) {
|
|||||||
return "", fmt.Errorf("not valid string")
|
return "", fmt.Errorf("not valid string")
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||||
if input.Echo {
|
if input.Echo {
|
||||||
config.Echo = input.Echo
|
config.Echo = input.Echo
|
||||||
}
|
}
|
||||||
@@ -282,55 +255,11 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
|
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
|
||||||
// Load a config file if present after the model name
|
cfg, err := config.Load(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16)
|
||||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
|
||||||
|
|
||||||
var cfg *config.Config
|
|
||||||
|
|
||||||
defaults := func() {
|
|
||||||
cfg = config.DefaultConfig(modelFile)
|
|
||||||
cfg.ContextSize = ctx
|
|
||||||
cfg.Threads = threads
|
|
||||||
cfg.F16 = f16
|
|
||||||
cfg.Debug = debug
|
|
||||||
}
|
|
||||||
|
|
||||||
cfgExisting, exists := cm.GetConfig(modelFile)
|
|
||||||
if !exists {
|
|
||||||
if _, err := os.Stat(modelConfig); err == nil {
|
|
||||||
if err := cm.LoadConfig(modelConfig); err != nil {
|
|
||||||
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
|
||||||
}
|
|
||||||
cfgExisting, exists = cm.GetConfig(modelFile)
|
|
||||||
if exists {
|
|
||||||
cfg = &cfgExisting
|
|
||||||
} else {
|
|
||||||
defaults()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
defaults()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cfg = &cfgExisting
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the parameters for the language model prediction
|
// Set the parameters for the language model prediction
|
||||||
updateConfig(cfg, input)
|
updateRequestConfig(cfg, input)
|
||||||
|
|
||||||
// Don't allow 0 as setting
|
return cfg, input, err
|
||||||
if cfg.Threads == 0 {
|
|
||||||
if threads != 0 {
|
|
||||||
cfg.Threads = threads
|
|
||||||
} else {
|
|
||||||
cfg.Threads = 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enforce debug flag if passed from CLI
|
|
||||||
if debug {
|
|
||||||
cfg.Debug = true
|
|
||||||
}
|
|
||||||
|
|
||||||
return cfg, input, nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,12 +19,12 @@ import (
|
|||||||
// https://platform.openai.com/docs/api-reference/audio/create
|
// https://platform.openai.com/docs/api-reference/audio/create
|
||||||
func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
m, input, err := readInput(c, o, false)
|
m, input, err := readRequest(c, o, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,10 +4,11 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"embed"
|
"embed"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/metrics"
|
||||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/metrics"
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -27,6 +28,8 @@ type Option struct {
|
|||||||
ApiKeys []string
|
ApiKeys []string
|
||||||
Metrics *metrics.Metrics
|
Metrics *metrics.Metrics
|
||||||
|
|
||||||
|
ModelLibraryURL string
|
||||||
|
|
||||||
Galleries []gallery.Gallery
|
Galleries []gallery.Gallery
|
||||||
|
|
||||||
BackendAssets embed.FS
|
BackendAssets embed.FS
|
||||||
@@ -36,7 +39,16 @@ type Option struct {
|
|||||||
|
|
||||||
AutoloadGalleries bool
|
AutoloadGalleries bool
|
||||||
|
|
||||||
SingleBackend bool
|
SingleBackend bool
|
||||||
|
ParallelBackendRequests bool
|
||||||
|
|
||||||
|
WatchDogIdle bool
|
||||||
|
WatchDogBusy bool
|
||||||
|
WatchDog bool
|
||||||
|
|
||||||
|
ModelsURL []string
|
||||||
|
|
||||||
|
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
type AppOption func(*Option)
|
type AppOption func(*Option)
|
||||||
@@ -56,16 +68,58 @@ func NewOptions(o ...AppOption) *Option {
|
|||||||
return opt
|
return opt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithModelsURL(urls ...string) AppOption {
|
||||||
|
return func(o *Option) {
|
||||||
|
o.ModelsURL = urls
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func WithCors(b bool) AppOption {
|
func WithCors(b bool) AppOption {
|
||||||
return func(o *Option) {
|
return func(o *Option) {
|
||||||
o.CORS = b
|
o.CORS = b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithModelLibraryURL(url string) AppOption {
|
||||||
|
return func(o *Option) {
|
||||||
|
o.ModelLibraryURL = url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var EnableWatchDog = func(o *Option) {
|
||||||
|
o.WatchDog = true
|
||||||
|
}
|
||||||
|
|
||||||
|
var EnableWatchDogIdleCheck = func(o *Option) {
|
||||||
|
o.WatchDog = true
|
||||||
|
o.WatchDogIdle = true
|
||||||
|
}
|
||||||
|
|
||||||
|
var EnableWatchDogBusyCheck = func(o *Option) {
|
||||||
|
o.WatchDog = true
|
||||||
|
o.WatchDogBusy = true
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
|
||||||
|
return func(o *Option) {
|
||||||
|
o.WatchDogBusyTimeout = t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetWatchDogIdleTimeout(t time.Duration) AppOption {
|
||||||
|
return func(o *Option) {
|
||||||
|
o.WatchDogIdleTimeout = t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var EnableSingleBackend = func(o *Option) {
|
var EnableSingleBackend = func(o *Option) {
|
||||||
o.SingleBackend = true
|
o.SingleBackend = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var EnableParallelBackendRequests = func(o *Option) {
|
||||||
|
o.ParallelBackendRequests = true
|
||||||
|
}
|
||||||
|
|
||||||
var EnableGalleriesAutoload = func(o *Option) {
|
var EnableGalleriesAutoload = func(o *Option) {
|
||||||
o.AutoloadGalleries = true
|
o.AutoloadGalleries = true
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,6 +83,12 @@ type OpenAIModel struct {
|
|||||||
Object string `json:"object"`
|
Object string `json:"object"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ChatCompletionResponseFormatType string
|
||||||
|
|
||||||
|
type ChatCompletionResponseFormat struct {
|
||||||
|
Type ChatCompletionResponseFormatType `json:"type,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type OpenAIRequest struct {
|
type OpenAIRequest struct {
|
||||||
config.PredictionOptions
|
config.PredictionOptions
|
||||||
|
|
||||||
@@ -92,7 +98,7 @@ type OpenAIRequest struct {
|
|||||||
// whisper
|
// whisper
|
||||||
File string `json:"file" validate:"required"`
|
File string `json:"file" validate:"required"`
|
||||||
//whisper/image
|
//whisper/image
|
||||||
ResponseFormat string `json:"response_format"`
|
ResponseFormat ChatCompletionResponseFormat `json:"response_format"`
|
||||||
// image
|
// image
|
||||||
Size string `json:"size"`
|
Size string `json:"size"`
|
||||||
// Prompt is read only by completion/image API calls
|
// Prompt is read only by completion/image API calls
|
||||||
|
|||||||
@@ -110,8 +110,8 @@ message ModelOptions {
|
|||||||
string CLIPModel = 31;
|
string CLIPModel = 31;
|
||||||
string CLIPSubfolder = 32;
|
string CLIPSubfolder = 32;
|
||||||
int32 CLIPSkip = 33;
|
int32 CLIPSkip = 33;
|
||||||
|
string ControlNet = 48;
|
||||||
|
|
||||||
// RWKV
|
|
||||||
string Tokenizer = 34;
|
string Tokenizer = 34;
|
||||||
|
|
||||||
// LLM (llama.cpp)
|
// LLM (llama.cpp)
|
||||||
@@ -134,6 +134,8 @@ message ModelOptions {
|
|||||||
float YarnAttnFactor = 45;
|
float YarnAttnFactor = 45;
|
||||||
float YarnBetaFast = 46;
|
float YarnBetaFast = 46;
|
||||||
float YarnBetaSlow = 47;
|
float YarnBetaSlow = 47;
|
||||||
|
|
||||||
|
string Type = 49;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
457
backend/backend_grpc.pb.go
Normal file
457
backend/backend_grpc.pb.go
Normal file
@@ -0,0 +1,457 @@
|
|||||||
|
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||||
|
// versions:
|
||||||
|
// - protoc-gen-go-grpc v1.2.0
|
||||||
|
// - protoc v4.23.4
|
||||||
|
// source: backend/backend.proto
|
||||||
|
|
||||||
|
package proto
|
||||||
|
|
||||||
|
import (
|
||||||
|
context "context"
|
||||||
|
grpc "google.golang.org/grpc"
|
||||||
|
codes "google.golang.org/grpc/codes"
|
||||||
|
status "google.golang.org/grpc/status"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This is a compile-time assertion to ensure that this generated file
|
||||||
|
// is compatible with the grpc package it is being compiled against.
|
||||||
|
// Requires gRPC-Go v1.32.0 or later.
|
||||||
|
const _ = grpc.SupportPackageIsVersion7
|
||||||
|
|
||||||
|
// BackendClient is the client API for Backend service.
|
||||||
|
//
|
||||||
|
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
||||||
|
type BackendClient interface {
|
||||||
|
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
|
||||||
|
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
|
||||||
|
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
|
||||||
|
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
|
||||||
|
GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
|
||||||
|
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
|
||||||
|
Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendClient struct {
|
||||||
|
cc grpc.ClientConnInterface
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
|
||||||
|
return &backendClient{cc}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
|
||||||
|
out := new(Reply)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
|
||||||
|
out := new(Reply)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
|
||||||
|
stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
x := &backendPredictStreamClient{stream}
|
||||||
|
if err := x.ClientStream.SendMsg(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := x.ClientStream.CloseSend(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return x, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type Backend_PredictStreamClient interface {
|
||||||
|
Recv() (*Reply, error)
|
||||||
|
grpc.ClientStream
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendPredictStreamClient struct {
|
||||||
|
grpc.ClientStream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *backendPredictStreamClient) Recv() (*Reply, error) {
|
||||||
|
m := new(Reply)
|
||||||
|
if err := x.ClientStream.RecvMsg(m); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
|
||||||
|
out := new(EmbeddingResult)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
|
||||||
|
out := new(TranscriptResult)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
|
||||||
|
out := new(TokenizationResponse)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
|
||||||
|
out := new(StatusResponse)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackendServer is the server API for Backend service.
|
||||||
|
// All implementations must embed UnimplementedBackendServer
|
||||||
|
// for forward compatibility
|
||||||
|
type BackendServer interface {
|
||||||
|
Health(context.Context, *HealthMessage) (*Reply, error)
|
||||||
|
Predict(context.Context, *PredictOptions) (*Reply, error)
|
||||||
|
LoadModel(context.Context, *ModelOptions) (*Result, error)
|
||||||
|
PredictStream(*PredictOptions, Backend_PredictStreamServer) error
|
||||||
|
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
|
||||||
|
GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
|
||||||
|
AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
|
||||||
|
TTS(context.Context, *TTSRequest) (*Result, error)
|
||||||
|
TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
|
||||||
|
Status(context.Context, *HealthMessage) (*StatusResponse, error)
|
||||||
|
mustEmbedUnimplementedBackendServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnimplementedBackendServer must be embedded to have forward compatible implementations.
|
||||||
|
type UnimplementedBackendServer struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
|
||||||
|
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
|
||||||
|
|
||||||
|
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
|
||||||
|
// Use of this interface is not recommended, as added methods to BackendServer will
|
||||||
|
// result in compilation errors.
|
||||||
|
type UnsafeBackendServer interface {
|
||||||
|
mustEmbedUnimplementedBackendServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
|
||||||
|
s.RegisterService(&Backend_ServiceDesc, srv)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(HealthMessage)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Health(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Health",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Predict(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Predict",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(ModelOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).LoadModel(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/LoadModel",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
|
||||||
|
m := new(PredictOptions)
|
||||||
|
if err := stream.RecvMsg(m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
|
||||||
|
}
|
||||||
|
|
||||||
|
type Backend_PredictStreamServer interface {
|
||||||
|
Send(*Reply) error
|
||||||
|
grpc.ServerStream
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendPredictStreamServer struct {
|
||||||
|
grpc.ServerStream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *backendPredictStreamServer) Send(m *Reply) error {
|
||||||
|
return x.ServerStream.SendMsg(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Embedding(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Embedding",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(GenerateImageRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).GenerateImage(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/GenerateImage",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(TranscriptRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).AudioTranscription(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/AudioTranscription",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(TTSRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).TTS(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/TTS",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).TokenizeString(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/TokenizeString",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(HealthMessage)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Status(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Status",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
|
||||||
|
// It's only intended for direct use with grpc.RegisterService,
|
||||||
|
// and not to be introspected or modified (even as a copy)
|
||||||
|
var Backend_ServiceDesc = grpc.ServiceDesc{
|
||||||
|
ServiceName: "backend.Backend",
|
||||||
|
HandlerType: (*BackendServer)(nil),
|
||||||
|
Methods: []grpc.MethodDesc{
|
||||||
|
{
|
||||||
|
MethodName: "Health",
|
||||||
|
Handler: _Backend_Health_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Predict",
|
||||||
|
Handler: _Backend_Predict_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "LoadModel",
|
||||||
|
Handler: _Backend_LoadModel_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Embedding",
|
||||||
|
Handler: _Backend_Embedding_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "GenerateImage",
|
||||||
|
Handler: _Backend_GenerateImage_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "AudioTranscription",
|
||||||
|
Handler: _Backend_AudioTranscription_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "TTS",
|
||||||
|
Handler: _Backend_TTS_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "TokenizeString",
|
||||||
|
Handler: _Backend_TokenizeString_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Status",
|
||||||
|
Handler: _Backend_Status_Handler,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Streams: []grpc.StreamDesc{
|
||||||
|
{
|
||||||
|
StreamName: "PredictStream",
|
||||||
|
Handler: _Backend_PredictStream_Handler,
|
||||||
|
ServerStreams: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Metadata: "backend/backend.proto",
|
||||||
|
}
|
||||||
66
backend/cpp/grpc/Makefile
Normal file
66
backend/cpp/grpc/Makefile
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
# Basic platform detection
|
||||||
|
HOST_SYSTEM = $(shell uname | cut -f 1 -d_)
|
||||||
|
SYSTEM ?= $(HOST_SYSTEM)
|
||||||
|
|
||||||
|
TAG_LIB_GRPC?=v1.59.0
|
||||||
|
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
|
||||||
|
GIT_CLONE_DEPTH?=1
|
||||||
|
NUM_BUILD_THREADS?=$(shell nproc --ignore=1)
|
||||||
|
|
||||||
|
INSTALLED_PACKAGES=installed_packages
|
||||||
|
GRPC_REPO=grpc_repo
|
||||||
|
GRPC_BUILD=grpc_build
|
||||||
|
|
||||||
|
export CMAKE_ARGS?=
|
||||||
|
CMAKE_ARGS+=-DCMAKE_BUILD_TYPE=Release
|
||||||
|
CMAKE_ARGS+=-DgRPC_INSTALL=ON
|
||||||
|
CMAKE_ARGS+=-DEXECUTABLE_OUTPUT_PATH=../$(INSTALLED_PACKAGES)/grpc/bin
|
||||||
|
CMAKE_ARGS+=-DLIBRARY_OUTPUT_PATH=../$(INSTALLED_PACKAGES)/grpc/lib
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_TESTS=OFF
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_CSHARP_EXT=OFF
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_CPP_PLUGIN=ON
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_PYTHON_PLUGIN=ON
|
||||||
|
CMAKE_ARGS+=-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF
|
||||||
|
CMAKE_ARGS+=-Dprotobuf_WITH_ZLIB=ON
|
||||||
|
CMAKE_ARGS+=-DRE2_BUILD_TESTING=OFF
|
||||||
|
CMAKE_ARGS+=-DCMAKE_INSTALL_PREFIX=../$(INSTALLED_PACKAGES)
|
||||||
|
|
||||||
|
# windows need to set OPENSSL_NO_ASM. Results in slower crypto performance but doesn't build otherwise.
|
||||||
|
# May be resolvable, but for now its set. More info: https://stackoverflow.com/a/75240504/480673
|
||||||
|
ifeq ($(SYSTEM),MSYS)
|
||||||
|
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
|
||||||
|
endif
|
||||||
|
ifeq ($(SYSTEM),MINGW64)
|
||||||
|
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
|
||||||
|
endif
|
||||||
|
ifeq ($(SYSTEM),MINGW32)
|
||||||
|
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
|
||||||
|
endif
|
||||||
|
ifeq ($(SYSTEM),CYGWIN)
|
||||||
|
CMAKE_ARGS+=-DOPENSSL_NO_ASM=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
$(INSTALLED_PACKAGES): grpc_build
|
||||||
|
|
||||||
|
$(GRPC_REPO):
|
||||||
|
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
|
||||||
|
cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH)
|
||||||
|
|
||||||
|
$(GRPC_BUILD): $(GRPC_REPO)
|
||||||
|
mkdir -p $(GRPC_BUILD)
|
||||||
|
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
||||||
|
|
||||||
|
build: $(INSTALLED_PACKAGES)
|
||||||
|
|
||||||
|
rebuild:
|
||||||
|
rm -rf grpc_build
|
||||||
|
$(MAKE) grpc_build
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf grpc_build
|
||||||
|
rm -rf grpc_repo
|
||||||
|
rm -rf installed_packages
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Builds locally from sources the packages needed by the llama cpp backend.
|
|
||||||
|
|
||||||
# Makes sure a few base packages exist.
|
|
||||||
# sudo apt-get --no-upgrade -y install g++ gcc binutils cmake git build-essential autoconf libtool pkg-config
|
|
||||||
|
|
||||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
|
||||||
echo "Script directory: $SCRIPT_DIR"
|
|
||||||
|
|
||||||
CPP_INSTALLED_PACKAGES_DIR=$1
|
|
||||||
if [ -z ${CPP_INSTALLED_PACKAGES_DIR} ]; then
|
|
||||||
echo "CPP_INSTALLED_PACKAGES_DIR env variable not set. Don't know where to install: failed.";
|
|
||||||
echo
|
|
||||||
exit -1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -d "${CPP_INSTALLED_PACKAGES_DIR}" ]; then
|
|
||||||
echo "gRPC installation directory already exists. Nothing to do."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# The depth when cloning a git repo. 1 speeds up the clone when the repo history is not needed.
|
|
||||||
GIT_CLONE_DEPTH=1
|
|
||||||
|
|
||||||
NUM_BUILD_THREADS=$(nproc --ignore=1)
|
|
||||||
|
|
||||||
# Google gRPC --------------------------------------------------------------------------------------
|
|
||||||
TAG_LIB_GRPC="v1.59.0"
|
|
||||||
GIT_REPO_LIB_GRPC="https://github.com/grpc/grpc.git"
|
|
||||||
GRPC_REPO_DIR="${SCRIPT_DIR}/../grpc_repo"
|
|
||||||
GRPC_BUILD_DIR="${SCRIPT_DIR}/../grpc_build"
|
|
||||||
SRC_DIR_LIB_GRPC="${GRPC_REPO_DIR}/grpc"
|
|
||||||
|
|
||||||
echo "SRC_DIR_LIB_GRPC: ${SRC_DIR_LIB_GRPC}"
|
|
||||||
echo "GRPC_REPO_DIR: ${GRPC_REPO_DIR}"
|
|
||||||
echo "GRPC_BUILD_DIR: ${GRPC_BUILD_DIR}"
|
|
||||||
|
|
||||||
mkdir -pv ${GRPC_REPO_DIR}
|
|
||||||
|
|
||||||
rm -rf ${GRPC_BUILD_DIR}
|
|
||||||
mkdir -pv ${GRPC_BUILD_DIR}
|
|
||||||
|
|
||||||
mkdir -pv ${CPP_INSTALLED_PACKAGES_DIR}
|
|
||||||
|
|
||||||
if [ -d "${SRC_DIR_LIB_GRPC}" ]; then
|
|
||||||
echo "gRPC source already exists locally. Not cloned again."
|
|
||||||
else
|
|
||||||
( cd ${GRPC_REPO_DIR} && \
|
|
||||||
git clone --depth ${GIT_CLONE_DEPTH} -b ${TAG_LIB_GRPC} ${GIT_REPO_LIB_GRPC} && \
|
|
||||||
cd ${SRC_DIR_LIB_GRPC} && \
|
|
||||||
git submodule update --init --recursive --depth ${GIT_CLONE_DEPTH}
|
|
||||||
)
|
|
||||||
fi
|
|
||||||
|
|
||||||
( cd ${GRPC_BUILD_DIR} && \
|
|
||||||
cmake -G "Unix Makefiles" \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DgRPC_INSTALL=ON \
|
|
||||||
-DEXECUTABLE_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/bin \
|
|
||||||
-DLIBRARY_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/lib \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
-DgRPC_BUILD_CSHARP_EXT=OFF \
|
|
||||||
-DgRPC_BUILD_GRPC_CPP_PLUGIN=ON \
|
|
||||||
-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF \
|
|
||||||
-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF \
|
|
||||||
-DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF \
|
|
||||||
-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF \
|
|
||||||
-DgRPC_BUILD_GRPC_PYTHON_PLUGIN=ON \
|
|
||||||
-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF \
|
|
||||||
-Dprotobuf_WITH_ZLIB=ON \
|
|
||||||
-DRE2_BUILD_TESTING=OFF \
|
|
||||||
-DCMAKE_INSTALL_PREFIX=${CPP_INSTALLED_PACKAGES_DIR}/ \
|
|
||||||
${SRC_DIR_LIB_GRPC} && \
|
|
||||||
cmake --build . -- -j ${NUM_BUILD_THREADS} && \
|
|
||||||
cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
|
||||||
)
|
|
||||||
|
|
||||||
rm -rf ${GRPC_BUILD_DIR}
|
|
||||||
rm -rf ${GRPC_REPO_DIR}
|
|
||||||
|
|
||||||
@@ -17,9 +17,17 @@ cmake_minimum_required(VERSION 3.15)
|
|||||||
set(TARGET grpc-server)
|
set(TARGET grpc-server)
|
||||||
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||||
set(_REFLECTION grpc++_reflection)
|
set(_REFLECTION grpc++_reflection)
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||||
link_directories("/opt/homebrew/lib")
|
# Set correct Homebrew install folder for Apple Silicon and Intel Macs
|
||||||
include_directories("/opt/homebrew/include")
|
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||||
|
set(HOMEBREW_DEFAULT_PREFIX "/opt/homebrew")
|
||||||
|
else()
|
||||||
|
set(HOMEBREW_DEFAULT_PREFIX "/usr/local")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
link_directories("${HOMEBREW_DEFAULT_PREFIX}/lib")
|
||||||
|
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
find_package(absl CONFIG REQUIRED)
|
find_package(absl CONFIG REQUIRED)
|
||||||
@@ -36,7 +44,7 @@ include_directories(${Protobuf_INCLUDE_DIRS})
|
|||||||
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
|
|
||||||
# Proto file
|
# Proto file
|
||||||
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE)
|
get_filename_component(hw_proto "../../../../../../backend/backend.proto" ABSOLUTE)
|
||||||
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
||||||
|
|
||||||
# Generated sources
|
# Generated sources
|
||||||
@@ -62,7 +70,7 @@ add_library(hw_grpc_proto
|
|||||||
${hw_proto_srcs}
|
${hw_proto_srcs}
|
||||||
${hw_proto_hdrs} )
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
add_executable(${TARGET} grpc-server.cpp json.hpp )
|
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
absl::flags_parse
|
absl::flags_parse
|
||||||
gRPC::${_REFLECTION}
|
gRPC::${_REFLECTION}
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=d9b33fe95bd257b36c84ee5769cc048230067d6f
|
LLAMA_VERSION?=
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
BUILD_TYPE?=
|
BUILD_TYPE?=
|
||||||
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
@@ -19,8 +20,19 @@ else ifeq ($(BUILD_TYPE),hipblas)
|
|||||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
endif
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||||
|
if [ -z "$(LLAMA_VERSION)" ]; then \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
llama.cpp/examples/grpc-server:
|
llama.cpp/examples/grpc-server:
|
||||||
@@ -28,6 +40,7 @@ llama.cpp/examples/grpc-server:
|
|||||||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
@@ -46,5 +59,10 @@ clean:
|
|||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
bash -c "source $(ONEAPI_VARS); \
|
||||||
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
|
||||||
|
else
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||||
|
endif
|
||||||
cp llama.cpp/build/bin/grpc-server .
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
File diff suppressed because it is too large
Load Diff
510
backend/cpp/llama/utils.hpp
Normal file
510
backend/cpp/llama/utils.hpp
Normal file
@@ -0,0 +1,510 @@
|
|||||||
|
// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "json.hpp"
|
||||||
|
|
||||||
|
#include "../llava/clip.h"
|
||||||
|
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
extern bool server_verbose;
|
||||||
|
|
||||||
|
#ifndef SERVER_VERBOSE
|
||||||
|
#define SERVER_VERBOSE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if SERVER_VERBOSE != 1
|
||||||
|
#define LOG_VERBOSE(MSG, ...)
|
||||||
|
#else
|
||||||
|
#define LOG_VERBOSE(MSG, ...) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
if (server_verbose) \
|
||||||
|
{ \
|
||||||
|
server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
|
||||||
|
//
|
||||||
|
// parallel
|
||||||
|
//
|
||||||
|
|
||||||
|
enum server_state {
|
||||||
|
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
|
||||||
|
SERVER_STATE_READY, // Server is ready and model is loaded
|
||||||
|
SERVER_STATE_ERROR // An error occurred, load_model failed
|
||||||
|
};
|
||||||
|
|
||||||
|
enum task_type {
|
||||||
|
TASK_TYPE_COMPLETION,
|
||||||
|
TASK_TYPE_CANCEL,
|
||||||
|
TASK_TYPE_NEXT_RESPONSE
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_server {
|
||||||
|
int id = -1; // to be filled by llama_server_queue
|
||||||
|
int target_id;
|
||||||
|
task_type type;
|
||||||
|
json data;
|
||||||
|
bool infill_mode = false;
|
||||||
|
bool embedding_mode = false;
|
||||||
|
int multitask_id = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_result {
|
||||||
|
int id;
|
||||||
|
int multitask_id = -1;
|
||||||
|
bool stop;
|
||||||
|
bool error;
|
||||||
|
json result_json;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_multi {
|
||||||
|
int id;
|
||||||
|
std::set<int> subtasks_remaining{};
|
||||||
|
std::vector<task_result> results{};
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: can become bool if we can't find use of more states
|
||||||
|
enum slot_state
|
||||||
|
{
|
||||||
|
IDLE,
|
||||||
|
PROCESSING,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum slot_command
|
||||||
|
{
|
||||||
|
NONE,
|
||||||
|
LOAD_PROMPT,
|
||||||
|
RELEASE,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct slot_params
|
||||||
|
{
|
||||||
|
bool stream = true;
|
||||||
|
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
|
||||||
|
|
||||||
|
uint32_t seed = -1; // RNG seed
|
||||||
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
|
|
||||||
|
std::vector<std::string> antiprompt;
|
||||||
|
|
||||||
|
json input_prefix;
|
||||||
|
json input_suffix;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct slot_image
|
||||||
|
{
|
||||||
|
int32_t id;
|
||||||
|
|
||||||
|
bool request_encode_image = false;
|
||||||
|
float * image_embedding = nullptr;
|
||||||
|
int32_t image_tokens = 0;
|
||||||
|
|
||||||
|
clip_image_u8 * img_data;
|
||||||
|
|
||||||
|
std::string prefix_prompt; // before of this image
|
||||||
|
};
|
||||||
|
|
||||||
|
// completion token output with probabilities
|
||||||
|
struct completion_token_output
|
||||||
|
{
|
||||||
|
struct token_prob
|
||||||
|
{
|
||||||
|
llama_token tok;
|
||||||
|
float prob;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<token_prob> probs;
|
||||||
|
llama_token tok;
|
||||||
|
std::string text_to_send;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void server_log(const char *level, const char *function, int line,
|
||||||
|
const char *message, const nlohmann::ordered_json &extra)
|
||||||
|
{
|
||||||
|
nlohmann::ordered_json log
|
||||||
|
{
|
||||||
|
{"timestamp", time(nullptr)},
|
||||||
|
{"level", level},
|
||||||
|
{"function", function},
|
||||||
|
{"line", line},
|
||||||
|
{"message", message},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!extra.empty())
|
||||||
|
{
|
||||||
|
log.merge_patch(extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||||
|
printf("%.*s\n", (int)str.size(), str.data());
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// server utils
|
||||||
|
//
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||||
|
{
|
||||||
|
// Fallback null to default value
|
||||||
|
return body.contains(key) && !body.at(key).is_null()
|
||||||
|
? body.value(key, default_value)
|
||||||
|
: default_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string format_chatml(std::vector<json> messages)
|
||||||
|
{
|
||||||
|
std::ostringstream chatml_msgs;
|
||||||
|
|
||||||
|
for (auto it = messages.begin(); it != messages.end(); ++it) {
|
||||||
|
chatml_msgs << "<|im_start|>"
|
||||||
|
<< json_value(*it, "role", std::string("user")) << '\n';
|
||||||
|
chatml_msgs << json_value(*it, "content", std::string(""))
|
||||||
|
<< "<|im_end|>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
chatml_msgs << "<|im_start|>assistant" << '\n';
|
||||||
|
|
||||||
|
return chatml_msgs.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// work queue utils
|
||||||
|
//
|
||||||
|
|
||||||
|
struct llama_server_queue {
|
||||||
|
int id = 0;
|
||||||
|
std::mutex mutex_tasks;
|
||||||
|
// queues
|
||||||
|
std::vector<task_server> queue_tasks;
|
||||||
|
std::vector<task_server> queue_tasks_deferred;
|
||||||
|
std::vector<task_multi> queue_multitasks;
|
||||||
|
std::condition_variable condition_tasks;
|
||||||
|
// callback functions
|
||||||
|
std::function<void(task_server&)> callback_new_task;
|
||||||
|
std::function<void(task_multi&)> callback_finish_multitask;
|
||||||
|
std::function<void(void)> callback_all_task_finished;
|
||||||
|
|
||||||
|
// Add a new task to the end of the queue
|
||||||
|
int post(task_server task) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (task.id == -1) {
|
||||||
|
task.id = id++;
|
||||||
|
}
|
||||||
|
queue_tasks.push_back(std::move(task));
|
||||||
|
condition_tasks.notify_one();
|
||||||
|
return task.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a new task, but defer until one slot is available
|
||||||
|
void defer(task_server task) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
queue_tasks_deferred.push_back(std::move(task));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the next id for creating anew task
|
||||||
|
int get_new_id() {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
return id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register function to process a new task
|
||||||
|
void on_new_task(std::function<void(task_server&)> callback) {
|
||||||
|
callback_new_task = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register function to process a multitask
|
||||||
|
void on_finish_multitask(std::function<void(task_multi&)> callback) {
|
||||||
|
callback_finish_multitask = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the function to be called when the batch of tasks is finished
|
||||||
|
void on_all_tasks_finished(std::function<void(void)> callback) {
|
||||||
|
callback_all_task_finished = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call when the state of one slot is changed
|
||||||
|
void notify_slot_changed() {
|
||||||
|
// move deferred tasks back to main loop
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
for (auto & task : queue_tasks_deferred) {
|
||||||
|
queue_tasks.push_back(std::move(task));
|
||||||
|
}
|
||||||
|
queue_tasks_deferred.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the main loop. This call is blocking
|
||||||
|
[[noreturn]]
|
||||||
|
void start_loop() {
|
||||||
|
while (true) {
|
||||||
|
// new task arrived
|
||||||
|
LOG_VERBOSE("have new task", {});
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (queue_tasks.empty()) {
|
||||||
|
lock.unlock();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
task_server task = queue_tasks.front();
|
||||||
|
queue_tasks.erase(queue_tasks.begin());
|
||||||
|
lock.unlock();
|
||||||
|
LOG_VERBOSE("callback_new_task", {});
|
||||||
|
callback_new_task(task);
|
||||||
|
}
|
||||||
|
LOG_VERBOSE("callback_all_task_finished", {});
|
||||||
|
// process and update all the multitasks
|
||||||
|
auto queue_iterator = queue_multitasks.begin();
|
||||||
|
while (queue_iterator != queue_multitasks.end())
|
||||||
|
{
|
||||||
|
if (queue_iterator->subtasks_remaining.empty())
|
||||||
|
{
|
||||||
|
// all subtasks done == multitask is done
|
||||||
|
task_multi current_multitask = *queue_iterator;
|
||||||
|
callback_finish_multitask(current_multitask);
|
||||||
|
// remove this multitask
|
||||||
|
queue_iterator = queue_multitasks.erase(queue_iterator);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++queue_iterator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// all tasks in the current loop is finished
|
||||||
|
callback_all_task_finished();
|
||||||
|
}
|
||||||
|
LOG_VERBOSE("wait for new task", {});
|
||||||
|
// wait for new task
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (queue_tasks.empty()) {
|
||||||
|
condition_tasks.wait(lock, [&]{
|
||||||
|
return !queue_tasks.empty();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// functions to manage multitasks
|
||||||
|
//
|
||||||
|
|
||||||
|
// add a multitask by specifying the id of all subtask (subtask is a task_server)
|
||||||
|
void add_multitask(int multitask_id, std::vector<int>& sub_ids)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||||
|
task_multi multi;
|
||||||
|
multi.id = multitask_id;
|
||||||
|
std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
|
||||||
|
queue_multitasks.push_back(multi);
|
||||||
|
}
|
||||||
|
|
||||||
|
// updatethe remaining subtasks, while appending results to multitask
|
||||||
|
void update_multitask(int multitask_id, int subtask_id, task_result& result)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||||
|
for (auto& multitask : queue_multitasks)
|
||||||
|
{
|
||||||
|
if (multitask.id == multitask_id)
|
||||||
|
{
|
||||||
|
multitask.subtasks_remaining.erase(subtask_id);
|
||||||
|
multitask.results.push_back(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_server_response {
|
||||||
|
typedef std::function<void(int, int, task_result&)> callback_multitask_t;
|
||||||
|
callback_multitask_t callback_update_multitask;
|
||||||
|
// for keeping track of all tasks waiting for the result
|
||||||
|
std::set<int> waiting_task_ids;
|
||||||
|
// the main result queue
|
||||||
|
std::vector<task_result> queue_results;
|
||||||
|
std::mutex mutex_results;
|
||||||
|
std::condition_variable condition_results;
|
||||||
|
|
||||||
|
void add_waiting_task_id(int task_id) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
waiting_task_ids.insert(task_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void remove_waiting_task_id(int task_id) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
waiting_task_ids.erase(task_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function blocks the thread until there is a response for this task_id
|
||||||
|
task_result recv(int task_id) {
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
condition_results.wait(lock, [&]{
|
||||||
|
return !queue_results.empty();
|
||||||
|
});
|
||||||
|
LOG_VERBOSE("condition_results unblock", {});
|
||||||
|
|
||||||
|
for (int i = 0; i < (int) queue_results.size(); i++)
|
||||||
|
{
|
||||||
|
if (queue_results[i].id == task_id)
|
||||||
|
{
|
||||||
|
assert(queue_results[i].multitask_id == -1);
|
||||||
|
task_result res = queue_results[i];
|
||||||
|
queue_results.erase(queue_results.begin() + i);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// should never reach here
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the function to update multitask
|
||||||
|
void on_multitask_update(callback_multitask_t callback) {
|
||||||
|
callback_update_multitask = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send a new result to a waiting task_id
|
||||||
|
void send(task_result result) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
LOG_VERBOSE("send new result", {});
|
||||||
|
for (auto& task_id : waiting_task_ids) {
|
||||||
|
// LOG_TEE("waiting task id %i \n", task_id);
|
||||||
|
// for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
|
||||||
|
if (result.multitask_id == task_id)
|
||||||
|
{
|
||||||
|
LOG_VERBOSE("callback_update_multitask", {});
|
||||||
|
callback_update_multitask(task_id, result.id, result);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.id == task_id)
|
||||||
|
{
|
||||||
|
LOG_VERBOSE("queue_results.push_back", {});
|
||||||
|
queue_results.push_back(result);
|
||||||
|
condition_results.notify_one();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// base64 utils (TODO: move to common in the future)
|
||||||
|
//
|
||||||
|
|
||||||
|
static const std::string base64_chars =
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"0123456789+/";
|
||||||
|
|
||||||
|
static inline bool is_base64(uint8_t c)
|
||||||
|
{
|
||||||
|
return (isalnum(c) || (c == '+') || (c == '/'));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
int j = 0;
|
||||||
|
int in_ = 0;
|
||||||
|
|
||||||
|
int in_len = encoded_string.size();
|
||||||
|
|
||||||
|
uint8_t char_array_4[4];
|
||||||
|
uint8_t char_array_3[3];
|
||||||
|
|
||||||
|
std::vector<uint8_t> ret;
|
||||||
|
|
||||||
|
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
|
||||||
|
{
|
||||||
|
char_array_4[i++] = encoded_string[in_]; in_++;
|
||||||
|
if (i == 4)
|
||||||
|
{
|
||||||
|
for (i = 0; i <4; i++)
|
||||||
|
{
|
||||||
|
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||||
|
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||||
|
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||||
|
|
||||||
|
for (i = 0; (i < 3); i++)
|
||||||
|
{
|
||||||
|
ret.push_back(char_array_3[i]);
|
||||||
|
}
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i)
|
||||||
|
{
|
||||||
|
for (j = i; j <4; j++)
|
||||||
|
{
|
||||||
|
char_array_4[j] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j <4; j++)
|
||||||
|
{
|
||||||
|
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||||
|
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||||
|
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||||
|
|
||||||
|
for (j = 0; (j < i - 1); j++)
|
||||||
|
{
|
||||||
|
ret.push_back(char_array_3[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// random string / id
|
||||||
|
//
|
||||||
|
|
||||||
|
static std::string random_string()
|
||||||
|
{
|
||||||
|
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 generator(rd());
|
||||||
|
|
||||||
|
std::string result(32, ' ');
|
||||||
|
|
||||||
|
for (int i = 0; i < 32; ++i) {
|
||||||
|
result[i] = str[generator() % str.size()];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string gen_chatcmplid()
|
||||||
|
{
|
||||||
|
std::stringstream chatcmplid;
|
||||||
|
chatcmplid << "chatcmpl-" << random_string();
|
||||||
|
return chatcmplid.str();
|
||||||
|
}
|
||||||
@@ -5,8 +5,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
rwkv "github.com/go-skynet/LocalAI/pkg/backend/llm/rwkv"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &rwkv.LLM{}); err != nil {
|
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package image
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
@@ -8,20 +8,20 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
||||||
)
|
)
|
||||||
|
|
||||||
type StableDiffusion struct {
|
type Image struct {
|
||||||
base.SingleThread
|
base.SingleThread
|
||||||
stablediffusion *stablediffusion.StableDiffusion
|
stablediffusion *stablediffusion.StableDiffusion
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sd *StableDiffusion) Load(opts *pb.ModelOptions) error {
|
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||||
var err error
|
var err error
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
sd.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sd *StableDiffusion) GenerateImage(opts *pb.GenerateImageRequest) error {
|
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||||
return sd.stablediffusion.GenerateImage(
|
return image.stablediffusion.GenerateImage(
|
||||||
int(opts.Height),
|
int(opts.Height),
|
||||||
int(opts.Width),
|
int(opts.Width),
|
||||||
int(opts.Mode),
|
int(opts.Mode),
|
||||||
@@ -5,7 +5,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
bert "github.com/go-skynet/LocalAI/pkg/backend/llm/bert"
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &bert.Embeddings{}); err != nil {
|
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
32
backend/go/image/tinydream/tinydream.go
Normal file
32
backend/go/image/tinydream/tinydream.go
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/tinydream"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
base.SingleThread
|
||||||
|
tinydream *tinydream.TinyDream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||||
|
var err error
|
||||||
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
image.tinydream, err = tinydream.New(opts.ModelFile)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||||
|
return image.tinydream.GenerateImage(
|
||||||
|
int(opts.Height),
|
||||||
|
int(opts.Width),
|
||||||
|
int(opts.Step),
|
||||||
|
int(opts.Seed),
|
||||||
|
opts.PositivePrompt,
|
||||||
|
opts.NegativePrompt,
|
||||||
|
opts.Dst)
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package bert
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
@@ -5,8 +5,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
gpt4all "github.com/go-skynet/LocalAI/pkg/backend/llm/gpt4all"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &gpt4all.LLM{}); err != nil {
|
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package gpt4all
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
21
backend/go/llm/gpt4all/main.go
Normal file
21
backend/go/llm/gpt4all/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package langchain
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
21
backend/go/llm/langchain/main.go
Normal file
21
backend/go/llm/langchain/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package llama
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
@@ -3,8 +3,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
llama "github.com/go-skynet/LocalAI/pkg/backend/llm/llama-stable"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -15,7 +13,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &llama.LLM{}); err != nil {
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package llama
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
|
// GRPC Falcon server
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
tts "github.com/go-skynet/LocalAI/pkg/backend/tts"
|
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &tts.Piper{}); err != nil {
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
21
backend/go/llm/rwkv/main.go
Normal file
21
backend/go/llm/rwkv/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package rwkv
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
21
backend/go/transcribe/main.go
Normal file
21
backend/go/transcribe/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &Whisper{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package transcribe
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package transcribe
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
21
backend/go/tts/main.go
Normal file
21
backend/go/tts/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &Piper{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package tts
|
package main
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
4
backend/python/autogptq/Makefile
Normal file
4
backend/python/autogptq/Makefile
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
.PHONY: autogptq
|
||||||
|
autogptq:
|
||||||
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
61
backend/python/autogptq/backend_pb2.py
Normal file
61
backend/python/autogptq/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
@@ -6,7 +6,7 @@
|
|||||||
export PATH=$PATH:/opt/conda/bin
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
# Activate conda environment
|
# Activate conda environment
|
||||||
source activate autogptq
|
source activate transformers
|
||||||
|
|
||||||
# get the directory where the bash script is located
|
# get the directory where the bash script is located
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
15
backend/python/bark/Makefile
Normal file
15
backend/python/bark/Makefile
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
.PHONY: ttsbark
|
||||||
|
ttsbark:
|
||||||
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run:
|
||||||
|
@echo "Running bark..."
|
||||||
|
bash run.sh
|
||||||
|
@echo "bark run."
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
|
test:
|
||||||
|
@echo "Testing bark..."
|
||||||
|
bash test.sh
|
||||||
|
@echo "bark tested."
|
||||||
61
backend/python/bark/backend_pb2.py
Normal file
61
backend/python/bark/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
@@ -6,7 +6,7 @@
|
|||||||
export PATH=$PATH:/opt/conda/bin
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
# Activate conda environment
|
# Activate conda environment
|
||||||
source activate ttsbark
|
source activate transformers
|
||||||
|
|
||||||
# get the directory where the bash script is located
|
# get the directory where the bash script is located
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
81
backend/python/bark/test.py
Normal file
81
backend/python/bark/test.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
"""
|
||||||
|
A test script to test the gRPC service
|
||||||
|
"""
|
||||||
|
import unittest
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackendServicer(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
TestBackendServicer is the class that tests the gRPC service
|
||||||
|
"""
|
||||||
|
def setUp(self):
|
||||||
|
"""
|
||||||
|
This method sets up the gRPC service by starting the server
|
||||||
|
"""
|
||||||
|
self.service = subprocess.Popen(["python3", "ttsbark.py", "--addr", "localhost:50051"])
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
"""
|
||||||
|
This method tears down the gRPC service by terminating the server
|
||||||
|
"""
|
||||||
|
self.service.terminate()
|
||||||
|
self.service.wait()
|
||||||
|
|
||||||
|
def test_server_startup(self):
|
||||||
|
"""
|
||||||
|
This method tests if the server starts up successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.Health(backend_pb2.HealthMessage())
|
||||||
|
self.assertEqual(response.message, b'OK')
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("Server failed to start")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_load_model(self):
|
||||||
|
"""
|
||||||
|
This method tests if the model is loaded successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="v2/en_speaker_4"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
self.assertEqual(response.message, "Model loaded successfully")
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("LoadModel service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_tts(self):
|
||||||
|
"""
|
||||||
|
This method tests if the embeddings are generated successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="v2/en_speaker_4"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||||
|
tts_response = stub.TTS(tts_request)
|
||||||
|
self.assertIsNotNone(tts_response)
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("TTS service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
@@ -1,11 +1,11 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
##
|
##
|
||||||
## A bash script wrapper that runs the huggingface server with conda
|
## A bash script wrapper that runs the bark server with conda
|
||||||
|
|
||||||
# Activate conda environment
|
# Activate conda environment
|
||||||
source activate huggingface
|
source activate transformers
|
||||||
|
|
||||||
# get the directory where the bash script is located
|
# get the directory where the bash script is located
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
python -m unittest $DIR/test_huggingface.py
|
python -m unittest $DIR/test.py
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
"""
|
|
||||||
This is the extra gRPC server of LocalAI
|
|
||||||
"""
|
|
||||||
|
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
This is an extra gRPC server of LocalAI for Bark TTS
|
||||||
|
"""
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
import time
|
import time
|
||||||
import argparse
|
import argparse
|
||||||
10
backend/python/common-env/transformers/Makefile
Normal file
10
backend/python/common-env/transformers/Makefile
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
CONDA_ENV_PATH = "transformers.yml"
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE), cublas)
|
||||||
|
CONDA_ENV_PATH = "transformers-nvidia.yml"
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: transformers
|
||||||
|
transformers:
|
||||||
|
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||||
|
bash install.sh $(CONDA_ENV_PATH)
|
||||||
24
backend/python/common-env/transformers/install.sh
Normal file
24
backend/python/common-env/transformers/install.sh
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
# Check if environment exist
|
||||||
|
conda_env_exists(){
|
||||||
|
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
if conda_env_exists "transformers" ; then
|
||||||
|
echo "Creating virtual environment..."
|
||||||
|
conda env create --name transformers --file $1
|
||||||
|
echo "Virtual environment created."
|
||||||
|
else
|
||||||
|
echo "Virtual environment already exists."
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
pip cache purge
|
||||||
|
fi
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
name: bark
|
name: transformers
|
||||||
channels:
|
channels:
|
||||||
- defaults
|
- defaults
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -35,6 +35,8 @@ dependencies:
|
|||||||
- certifi==2023.7.22
|
- certifi==2023.7.22
|
||||||
- charset-normalizer==3.3.0
|
- charset-normalizer==3.3.0
|
||||||
- datasets==2.14.5
|
- datasets==2.14.5
|
||||||
|
- sentence-transformers==2.2.2
|
||||||
|
- sentencepiece==0.1.99
|
||||||
- dill==0.3.7
|
- dill==0.3.7
|
||||||
- einops==0.7.0
|
- einops==0.7.0
|
||||||
- encodec==0.1.1
|
- encodec==0.1.1
|
||||||
@@ -43,7 +45,7 @@ dependencies:
|
|||||||
- fsspec==2023.6.0
|
- fsspec==2023.6.0
|
||||||
- funcy==2.0
|
- funcy==2.0
|
||||||
- grpcio==1.59.0
|
- grpcio==1.59.0
|
||||||
- huggingface-hub==0.16.4
|
- huggingface-hub
|
||||||
- idna==3.4
|
- idna==3.4
|
||||||
- jinja2==3.1.2
|
- jinja2==3.1.2
|
||||||
- jmespath==1.0.1
|
- jmespath==1.0.1
|
||||||
@@ -51,7 +53,7 @@ dependencies:
|
|||||||
- mpmath==1.3.0
|
- mpmath==1.3.0
|
||||||
- multidict==6.0.4
|
- multidict==6.0.4
|
||||||
- multiprocess==0.70.15
|
- multiprocess==0.70.15
|
||||||
- networkx==3.1
|
- networkx
|
||||||
- numpy==1.26.0
|
- numpy==1.26.0
|
||||||
- nvidia-cublas-cu12==12.1.3.1
|
- nvidia-cublas-cu12==12.1.3.1
|
||||||
- nvidia-cuda-cupti-cu12==12.1.105
|
- nvidia-cuda-cupti-cu12==12.1.105
|
||||||
@@ -66,7 +68,7 @@ dependencies:
|
|||||||
- nvidia-nvjitlink-cu12==12.2.140
|
- nvidia-nvjitlink-cu12==12.2.140
|
||||||
- nvidia-nvtx-cu12==12.1.105
|
- nvidia-nvtx-cu12==12.1.105
|
||||||
- packaging==23.2
|
- packaging==23.2
|
||||||
- pandas==2.1.1
|
- pandas
|
||||||
- peft==0.5.0
|
- peft==0.5.0
|
||||||
- protobuf==4.24.4
|
- protobuf==4.24.4
|
||||||
- psutil==5.9.5
|
- psutil==5.9.5
|
||||||
@@ -82,15 +84,35 @@ dependencies:
|
|||||||
- scipy==1.11.3
|
- scipy==1.11.3
|
||||||
- six==1.16.0
|
- six==1.16.0
|
||||||
- sympy==1.12
|
- sympy==1.12
|
||||||
- tokenizers==0.14.0
|
- tokenizers
|
||||||
- torch==2.1.0
|
- torch==2.1.2
|
||||||
- torchaudio==2.1.0
|
- torchaudio==2.1.2
|
||||||
- tqdm==4.66.1
|
- tqdm==4.66.1
|
||||||
- transformers==4.34.0
|
|
||||||
- triton==2.1.0
|
- triton==2.1.0
|
||||||
- typing-extensions==4.8.0
|
- typing-extensions==4.8.0
|
||||||
- tzdata==2023.3
|
- tzdata==2023.3
|
||||||
- urllib3==1.26.17
|
- urllib3==1.26.17
|
||||||
- xxhash==3.4.1
|
- xxhash==3.4.1
|
||||||
|
- auto-gptq==0.6.0
|
||||||
- yarl==1.9.2
|
- yarl==1.9.2
|
||||||
prefix: /opt/conda/envs/bark
|
- soundfile
|
||||||
|
- langid
|
||||||
|
- wget
|
||||||
|
- unidecode
|
||||||
|
- pyopenjtalk-prebuilt
|
||||||
|
- pypinyin
|
||||||
|
- inflect
|
||||||
|
- cn2an
|
||||||
|
- jieba
|
||||||
|
- eng_to_ipa
|
||||||
|
- openai-whisper
|
||||||
|
- matplotlib
|
||||||
|
- gradio==3.41.2
|
||||||
|
- nltk
|
||||||
|
- sudachipy
|
||||||
|
- sudachidict_core
|
||||||
|
- vocos
|
||||||
|
- vllm==0.2.7
|
||||||
|
- transformers>=4.36.0 # Required for Mixtral.
|
||||||
|
- xformers==0.0.23.post1
|
||||||
|
prefix: /opt/conda/envs/transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
name: vllm
|
name: transformers
|
||||||
channels:
|
channels:
|
||||||
- defaults
|
- defaults
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -24,76 +24,84 @@ dependencies:
|
|||||||
- xz=5.4.2=h5eee18b_0
|
- xz=5.4.2=h5eee18b_0
|
||||||
- zlib=1.2.13=h5eee18b_0
|
- zlib=1.2.13=h5eee18b_0
|
||||||
- pip:
|
- pip:
|
||||||
|
- accelerate==0.23.0
|
||||||
|
- aiohttp==3.8.5
|
||||||
- aiosignal==1.3.1
|
- aiosignal==1.3.1
|
||||||
- anyio==3.7.1
|
- async-timeout==4.0.3
|
||||||
- attrs==23.1.0
|
- attrs==23.1.0
|
||||||
|
- bark==0.1.5
|
||||||
|
- boto3==1.28.61
|
||||||
|
- botocore==1.31.61
|
||||||
- certifi==2023.7.22
|
- certifi==2023.7.22
|
||||||
|
- TTS==0.22.0
|
||||||
- charset-normalizer==3.3.0
|
- charset-normalizer==3.3.0
|
||||||
- click==8.1.7
|
- datasets==2.14.5
|
||||||
- cmake==3.27.6
|
- sentence-transformers==2.2.2
|
||||||
- fastapi==0.103.2
|
- sentencepiece==0.1.99
|
||||||
|
- dill==0.3.7
|
||||||
|
- einops==0.7.0
|
||||||
|
- encodec==0.1.1
|
||||||
- filelock==3.12.4
|
- filelock==3.12.4
|
||||||
- frozenlist==1.4.0
|
- frozenlist==1.4.0
|
||||||
- fsspec==2023.9.2
|
- fsspec==2023.6.0
|
||||||
|
- funcy==2.0
|
||||||
- grpcio==1.59.0
|
- grpcio==1.59.0
|
||||||
- h11==0.14.0
|
- huggingface-hub
|
||||||
- httptools==0.6.0
|
|
||||||
- huggingface-hub==0.17.3
|
|
||||||
- idna==3.4
|
- idna==3.4
|
||||||
- jinja2==3.1.2
|
- jinja2==3.1.2
|
||||||
- jsonschema==4.19.1
|
- jmespath==1.0.1
|
||||||
- jsonschema-specifications==2023.7.1
|
|
||||||
- lit==17.0.2
|
|
||||||
- markupsafe==2.1.3
|
- markupsafe==2.1.3
|
||||||
- mpmath==1.3.0
|
- mpmath==1.3.0
|
||||||
- msgpack==1.0.7
|
- multidict==6.0.4
|
||||||
- networkx==3.1
|
- multiprocess==0.70.15
|
||||||
- ninja==1.11.1
|
- networkx
|
||||||
- numpy==1.26.0
|
- numpy==1.26.0
|
||||||
- nvidia-cublas-cu11==11.10.3.66
|
|
||||||
- nvidia-cuda-cupti-cu11==11.7.101
|
|
||||||
- nvidia-cuda-nvrtc-cu11==11.7.99
|
|
||||||
- nvidia-cuda-runtime-cu11==11.7.99
|
|
||||||
- nvidia-cudnn-cu11==8.5.0.96
|
|
||||||
- nvidia-cufft-cu11==10.9.0.58
|
|
||||||
- nvidia-curand-cu11==10.2.10.91
|
|
||||||
- nvidia-cusolver-cu11==11.4.0.1
|
|
||||||
- nvidia-cusparse-cu11==11.7.4.91
|
|
||||||
- nvidia-nccl-cu11==2.14.3
|
|
||||||
- nvidia-nvtx-cu11==11.7.91
|
|
||||||
- packaging==23.2
|
- packaging==23.2
|
||||||
- pandas==2.1.1
|
- pandas
|
||||||
|
- peft==0.5.0
|
||||||
- protobuf==4.24.4
|
- protobuf==4.24.4
|
||||||
- psutil==5.9.5
|
- psutil==5.9.5
|
||||||
- pyarrow==13.0.0
|
- pyarrow==13.0.0
|
||||||
- pydantic==1.10.13
|
|
||||||
- python-dateutil==2.8.2
|
- python-dateutil==2.8.2
|
||||||
- python-dotenv==1.0.0
|
|
||||||
- pytz==2023.3.post1
|
- pytz==2023.3.post1
|
||||||
- pyyaml==6.0.1
|
- pyyaml==6.0.1
|
||||||
- ray==2.7.0
|
|
||||||
- referencing==0.30.2
|
|
||||||
- regex==2023.10.3
|
- regex==2023.10.3
|
||||||
- requests==2.31.0
|
- requests==2.31.0
|
||||||
- rpds-py==0.10.4
|
- rouge==1.0.1
|
||||||
- safetensors==0.4.0
|
- s3transfer==0.7.0
|
||||||
- sentencepiece==0.1.99
|
- safetensors==0.3.3
|
||||||
|
- scipy==1.11.3
|
||||||
- six==1.16.0
|
- six==1.16.0
|
||||||
- sniffio==1.3.0
|
|
||||||
- starlette==0.27.0
|
|
||||||
- sympy==1.12
|
- sympy==1.12
|
||||||
- tokenizers==0.14.1
|
- tokenizers
|
||||||
- torch==2.0.1
|
- torch==2.1.2
|
||||||
|
- torchaudio==2.1.2
|
||||||
- tqdm==4.66.1
|
- tqdm==4.66.1
|
||||||
- transformers==4.34.0
|
- triton==2.1.0
|
||||||
- triton==2.0.0
|
|
||||||
- typing-extensions==4.8.0
|
- typing-extensions==4.8.0
|
||||||
- tzdata==2023.3
|
- tzdata==2023.3
|
||||||
- urllib3==2.0.6
|
- auto-gptq==0.6.0
|
||||||
- uvicorn==0.23.2
|
- urllib3==1.26.17
|
||||||
- uvloop==0.17.0
|
- xxhash==3.4.1
|
||||||
- vllm==0.2.0
|
- yarl==1.9.2
|
||||||
- watchfiles==0.20.0
|
- soundfile
|
||||||
- websockets==11.0.3
|
- langid
|
||||||
- xformers==0.0.22
|
- wget
|
||||||
prefix: /opt/conda/envs/vllm
|
- unidecode
|
||||||
|
- pyopenjtalk-prebuilt
|
||||||
|
- pypinyin
|
||||||
|
- inflect
|
||||||
|
- cn2an
|
||||||
|
- jieba
|
||||||
|
- eng_to_ipa
|
||||||
|
- openai-whisper
|
||||||
|
- matplotlib
|
||||||
|
- gradio==3.41.2
|
||||||
|
- nltk
|
||||||
|
- sudachipy
|
||||||
|
- sudachidict_core
|
||||||
|
- vocos
|
||||||
|
- vllm==0.2.7
|
||||||
|
- transformers>=4.36.0 # Required for Mixtral.
|
||||||
|
- xformers==0.0.23.post1
|
||||||
|
prefix: /opt/conda/envs/transformers
|
||||||
15
backend/python/coqui/Makefile
Normal file
15
backend/python/coqui/Makefile
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
.PHONY: coqui
|
||||||
|
coqui:
|
||||||
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run:
|
||||||
|
@echo "Running coqui..."
|
||||||
|
bash run.sh
|
||||||
|
@echo "coqui run."
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
|
test:
|
||||||
|
@echo "Testing coqui..."
|
||||||
|
bash test.sh
|
||||||
|
@echo "coqui tested."
|
||||||
11
backend/python/coqui/README.md
Normal file
11
backend/python/coqui/README.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Creating a separate environment for ttsbark project
|
||||||
|
|
||||||
|
```
|
||||||
|
make coqui
|
||||||
|
```
|
||||||
|
|
||||||
|
# Testing the gRPC server
|
||||||
|
|
||||||
|
```
|
||||||
|
make test
|
||||||
|
```
|
||||||
61
backend/python/coqui/backend_pb2.py
Normal file
61
backend/python/coqui/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
104
backend/python/coqui/coqui_server.py
Normal file
104
backend/python/coqui/coqui_server.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
This is an extra gRPC server of LocalAI for Bark TTS
|
||||||
|
"""
|
||||||
|
from concurrent import futures
|
||||||
|
import time
|
||||||
|
import argparse
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from TTS.api import TTS
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
|
||||||
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
|
||||||
|
|
||||||
|
# Implement the BackendServicer class with the service methods
|
||||||
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
"""
|
||||||
|
BackendServicer is the class that implements the gRPC service
|
||||||
|
"""
|
||||||
|
def Health(self, request, context):
|
||||||
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
|
||||||
|
# Get device
|
||||||
|
# device = "cuda" if request.CUDA else "cpu"
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
print("CUDA is available", file=sys.stderr)
|
||||||
|
device = "cuda"
|
||||||
|
else:
|
||||||
|
print("CUDA is not available", file=sys.stderr)
|
||||||
|
device = "cpu"
|
||||||
|
|
||||||
|
if not torch.cuda.is_available() and request.CUDA:
|
||||||
|
return backend_pb2.Result(success=False, message="CUDA is not available")
|
||||||
|
|
||||||
|
self.AudioPath = None
|
||||||
|
# List available 🐸TTS models
|
||||||
|
print(TTS().list_models())
|
||||||
|
if os.path.isabs(request.AudioPath):
|
||||||
|
self.AudioPath = request.AudioPath
|
||||||
|
elif request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
|
||||||
|
# get base path of modelFile
|
||||||
|
modelFileBase = os.path.dirname(request.ModelFile)
|
||||||
|
# modify LoraAdapter to be relative to modelFileBase
|
||||||
|
self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Preparing models, please wait", file=sys.stderr)
|
||||||
|
self.tts = TTS(request.Model).to(device)
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
# Implement your logic here for the LoadModel service
|
||||||
|
# Replace this with your desired response
|
||||||
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
|
def TTS(self, request, context):
|
||||||
|
try:
|
||||||
|
self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=COQUI_LANGUAGE, file_path=request.dst)
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
|
def serve(address):
|
||||||
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
|
server.add_insecure_port(address)
|
||||||
|
server.start()
|
||||||
|
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||||
|
|
||||||
|
# Define the signal handler function
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
print("Received termination signal. Shutting down...")
|
||||||
|
server.stop(0)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Set the signal handlers for SIGINT and SIGTERM
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
server.stop(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
serve(args.addr)
|
||||||
14
backend/python/coqui/run.sh
Executable file
14
backend/python/coqui/run.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
##
|
||||||
|
## A bash script wrapper that runs the ttsbark server with conda
|
||||||
|
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python $DIR/coqui_server.py $@
|
||||||
82
backend/python/coqui/test.py
Normal file
82
backend/python/coqui/test.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
"""
|
||||||
|
A test script to test the gRPC service
|
||||||
|
"""
|
||||||
|
import unittest
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackendServicer(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
TestBackendServicer is the class that tests the gRPC service
|
||||||
|
"""
|
||||||
|
def setUp(self):
|
||||||
|
"""
|
||||||
|
This method sets up the gRPC service by starting the server
|
||||||
|
"""
|
||||||
|
self.service = subprocess.Popen(["python3", "coqui_server.py", "--addr", "localhost:50051"])
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
"""
|
||||||
|
This method tears down the gRPC service by terminating the server
|
||||||
|
"""
|
||||||
|
self.service.terminate()
|
||||||
|
self.service.wait()
|
||||||
|
|
||||||
|
def test_server_startup(self):
|
||||||
|
"""
|
||||||
|
This method tests if the server starts up successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.Health(backend_pb2.HealthMessage())
|
||||||
|
self.assertEqual(response.message, b'OK')
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("Server failed to start")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_load_model(self):
|
||||||
|
"""
|
||||||
|
This method tests if the model is loaded successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="tts_models/en/vctk/vits"))
|
||||||
|
print(response)
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
self.assertEqual(response.message, "Model loaded successfully")
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("LoadModel service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_tts(self):
|
||||||
|
"""
|
||||||
|
This method tests if the embeddings are generated successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="tts_models/en/vctk/vits"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||||
|
tts_response = stub.TTS(tts_request)
|
||||||
|
self.assertIsNotNone(tts_response)
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("TTS service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
11
backend/python/coqui/test.sh
Normal file
11
backend/python/coqui/test.sh
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
##
|
||||||
|
## A bash script wrapper that runs the bark server with conda
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python -m unittest $DIR/test.py
|
||||||
15
backend/python/diffusers/Makefile
Normal file
15
backend/python/diffusers/Makefile
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
CONDA_ENV_PATH = "diffusers.yml"
|
||||||
|
|
||||||
|
.PHONY: diffusers
|
||||||
|
diffusers:
|
||||||
|
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||||
|
bash install.sh $(CONDA_ENV_PATH)
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run:
|
||||||
|
@echo "Running diffusers..."
|
||||||
|
bash run.sh
|
||||||
|
@echo "Diffusers run."
|
||||||
|
|
||||||
|
test:
|
||||||
|
bash test.sh
|
||||||
@@ -18,9 +18,9 @@ import backend_pb2_grpc
|
|||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
|
from diffusers.utils import load_image,export_to_video
|
||||||
from compel import Compel
|
from compel import Compel
|
||||||
|
|
||||||
from transformers import CLIPTextModel
|
from transformers import CLIPTextModel
|
||||||
@@ -30,6 +30,11 @@ from safetensors.torch import load_file
|
|||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
COMPEL=os.environ.get("COMPEL", "1") == "1"
|
COMPEL=os.environ.get("COMPEL", "1") == "1"
|
||||||
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
|
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
|
||||||
|
SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
|
||||||
|
CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
|
||||||
|
FPS=os.environ.get("FPS", "7")
|
||||||
|
DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
||||||
|
FRAMES=os.environ.get("FRAMES", "64")
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
@@ -135,15 +140,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
print(f"Loading model {request.Model}...", file=sys.stderr)
|
print(f"Loading model {request.Model}...", file=sys.stderr)
|
||||||
print(f"Request {request}", file=sys.stderr)
|
print(f"Request {request}", file=sys.stderr)
|
||||||
torchType = torch.float32
|
torchType = torch.float32
|
||||||
|
variant = None
|
||||||
|
|
||||||
if request.F16Memory:
|
if request.F16Memory:
|
||||||
torchType = torch.float16
|
torchType = torch.float16
|
||||||
|
variant="fp16"
|
||||||
|
|
||||||
local = False
|
local = False
|
||||||
modelFile = request.Model
|
modelFile = request.Model
|
||||||
|
|
||||||
cfg_scale = 7
|
self.cfg_scale = 7
|
||||||
if request.CFGScale != 0:
|
if request.CFGScale != 0:
|
||||||
cfg_scale = request.CFGScale
|
self.cfg_scale = request.CFGScale
|
||||||
|
|
||||||
clipmodel = "runwayml/stable-diffusion-v1-5"
|
clipmodel = "runwayml/stable-diffusion-v1-5"
|
||||||
if request.CLIPModel != "":
|
if request.CLIPModel != "":
|
||||||
@@ -159,69 +167,86 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
modelFile = request.ModelFile
|
modelFile = request.ModelFile
|
||||||
|
|
||||||
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
|
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
|
||||||
|
self.img2vid=False
|
||||||
if request.IMG2IMG and request.PipelineType == "":
|
self.txt2vid=False
|
||||||
request.PipelineType == "StableDiffusionImg2ImgPipeline"
|
|
||||||
|
|
||||||
if request.PipelineType == "":
|
|
||||||
request.PipelineType == "StableDiffusionPipeline"
|
|
||||||
|
|
||||||
## img2img
|
## img2img
|
||||||
if request.PipelineType == "StableDiffusionImg2ImgPipeline":
|
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
|
||||||
if fromSingleFile:
|
if fromSingleFile:
|
||||||
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
|
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType)
|
||||||
guidance_scale=cfg_scale)
|
|
||||||
else:
|
else:
|
||||||
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
|
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType)
|
||||||
guidance_scale=cfg_scale)
|
|
||||||
|
|
||||||
if request.PipelineType == "StableDiffusionDepth2ImgPipeline":
|
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
|
||||||
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
|
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType)
|
||||||
guidance_scale=cfg_scale)
|
## img2vid
|
||||||
|
elif request.PipelineType == "StableVideoDiffusionPipeline":
|
||||||
|
self.img2vid=True
|
||||||
|
self.pipe = StableVideoDiffusionPipeline.from_pretrained(
|
||||||
|
request.Model, torch_dtype=torchType, variant=variant
|
||||||
|
)
|
||||||
|
if not DISABLE_CPU_OFFLOAD:
|
||||||
|
self.pipe.enable_model_cpu_offload()
|
||||||
## text2img
|
## text2img
|
||||||
if request.PipelineType == "StableDiffusionPipeline":
|
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
|
||||||
|
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
|
||||||
|
torch_dtype=torchType,
|
||||||
|
use_safetensors=SAFETENSORS,
|
||||||
|
variant=variant)
|
||||||
|
elif request.PipelineType == "StableDiffusionPipeline":
|
||||||
if fromSingleFile:
|
if fromSingleFile:
|
||||||
self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
|
self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType)
|
||||||
guidance_scale=cfg_scale)
|
|
||||||
else:
|
else:
|
||||||
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
|
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType)
|
||||||
guidance_scale=cfg_scale)
|
elif request.PipelineType == "DiffusionPipeline":
|
||||||
|
|
||||||
if request.PipelineType == "DiffusionPipeline":
|
|
||||||
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType)
|
||||||
guidance_scale=cfg_scale)
|
elif request.PipelineType == "VideoDiffusionPipeline":
|
||||||
|
self.txt2vid=True
|
||||||
if request.PipelineType == "StableDiffusionXLPipeline":
|
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
||||||
|
torch_dtype=torchType)
|
||||||
|
elif request.PipelineType == "StableDiffusionXLPipeline":
|
||||||
if fromSingleFile:
|
if fromSingleFile:
|
||||||
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
|
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
|
||||||
torch_dtype=torchType, use_safetensors=True,
|
torch_dtype=torchType,
|
||||||
guidance_scale=cfg_scale)
|
use_safetensors=True)
|
||||||
else:
|
else:
|
||||||
self.pipe = StableDiffusionXLPipeline.from_pretrained(
|
self.pipe = StableDiffusionXLPipeline.from_pretrained(
|
||||||
request.Model,
|
request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
# variant="fp16"
|
variant=variant)
|
||||||
guidance_scale=cfg_scale)
|
|
||||||
# https://github.com/huggingface/diffusers/issues/4446
|
|
||||||
# do not use text_encoder in the constructor since then
|
|
||||||
# https://github.com/huggingface/diffusers/issues/3212#issuecomment-1521841481
|
|
||||||
if CLIPSKIP and request.CLIPSkip != 0:
|
if CLIPSKIP and request.CLIPSkip != 0:
|
||||||
text_encoder = CLIPTextModel.from_pretrained(clipmodel, num_hidden_layers=request.CLIPSkip, subfolder=clipsubfolder, torch_dtype=torchType)
|
self.clip_skip = request.CLIPSkip
|
||||||
self.pipe.text_encoder=text_encoder
|
else:
|
||||||
|
self.clip_skip = 0
|
||||||
|
|
||||||
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU
|
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU
|
||||||
# TODO: this needs to be customized
|
# TODO: this needs to be customized
|
||||||
if request.SchedulerType != "":
|
if request.SchedulerType != "":
|
||||||
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
|
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
|
||||||
self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder)
|
|
||||||
|
if not self.img2vid:
|
||||||
|
self.compel = Compel(tokenizer=self.pipe.tokenizer, text_encoder=self.pipe.text_encoder)
|
||||||
|
|
||||||
|
|
||||||
|
if request.ControlNet:
|
||||||
|
self.controlnet = ControlNetModel.from_pretrained(
|
||||||
|
request.ControlNet, torch_dtype=torchType, variant=variant
|
||||||
|
)
|
||||||
|
self.pipe.controlnet = self.controlnet
|
||||||
|
else:
|
||||||
|
self.controlnet = None
|
||||||
|
|
||||||
if request.CUDA:
|
if request.CUDA:
|
||||||
self.pipe.to('cuda')
|
self.pipe.to('cuda')
|
||||||
|
if self.controlnet:
|
||||||
|
self.controlnet.to('cuda')
|
||||||
# Assume directory from request.ModelFile.
|
# Assume directory from request.ModelFile.
|
||||||
# Only if request.LoraAdapter it's not an absolute path
|
# Only if request.LoraAdapter it's not an absolute path
|
||||||
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
||||||
@@ -303,17 +328,28 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
prompt = request.positive_prompt
|
prompt = request.positive_prompt
|
||||||
|
|
||||||
|
steps = 1
|
||||||
|
|
||||||
|
if request.step != 0:
|
||||||
|
steps = request.step
|
||||||
|
|
||||||
# create a dictionary of values for the parameters
|
# create a dictionary of values for the parameters
|
||||||
options = {
|
options = {
|
||||||
"negative_prompt": request.negative_prompt,
|
"negative_prompt": request.negative_prompt,
|
||||||
"width": request.width,
|
"width": request.width,
|
||||||
"height": request.height,
|
"height": request.height,
|
||||||
"num_inference_steps": request.step,
|
"num_inference_steps": steps,
|
||||||
}
|
}
|
||||||
|
|
||||||
if request.src != "":
|
if request.src != "" and not self.controlnet and not self.img2vid:
|
||||||
image = Image.open(request.src)
|
image = Image.open(request.src)
|
||||||
options["image"] = image
|
options["image"] = image
|
||||||
|
elif self.controlnet and request.src:
|
||||||
|
pose_image = load_image(request.src)
|
||||||
|
options["image"] = pose_image
|
||||||
|
|
||||||
|
if CLIPSKIP and self.clip_skip != 0:
|
||||||
|
options["clip_skip"]=self.clip_skip
|
||||||
|
|
||||||
# Get the keys that we will build the args for our pipe for
|
# Get the keys that we will build the args for our pipe for
|
||||||
keys = options.keys()
|
keys = options.keys()
|
||||||
@@ -333,25 +369,42 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
request.seed
|
request.seed
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.img2vid:
|
||||||
|
# Load the conditioning image
|
||||||
|
image = load_image(request.src)
|
||||||
|
image = image.resize((1024, 576))
|
||||||
|
|
||||||
|
generator = torch.manual_seed(request.seed)
|
||||||
|
frames = self.pipe(image, guidance_scale=self.cfg_scale, decode_chunk_size=CHUNK_SIZE, generator=generator).frames[0]
|
||||||
|
export_to_video(frames, request.dst, fps=FPS)
|
||||||
|
return backend_pb2.Result(message="Media generated successfully", success=True)
|
||||||
|
|
||||||
|
if self.txt2vid:
|
||||||
|
video_frames = self.pipe(prompt, guidance_scale=self.cfg_scale, num_inference_steps=steps, num_frames=int(FRAMES)).frames
|
||||||
|
export_to_video(video_frames, request.dst)
|
||||||
|
return backend_pb2.Result(message="Media generated successfully", success=True)
|
||||||
|
|
||||||
image = {}
|
image = {}
|
||||||
if COMPEL:
|
if COMPEL:
|
||||||
conditioning = self.compel.build_conditioning_tensor(prompt)
|
conditioning = self.compel.build_conditioning_tensor(prompt)
|
||||||
kwargs["prompt_embeds"]= conditioning
|
kwargs["prompt_embeds"]= conditioning
|
||||||
# pass the kwargs dictionary to the self.pipe method
|
# pass the kwargs dictionary to the self.pipe method
|
||||||
image = self.pipe(
|
image = self.pipe(
|
||||||
|
guidance_scale=self.cfg_scale,
|
||||||
**kwargs
|
**kwargs
|
||||||
).images[0]
|
).images[0]
|
||||||
else:
|
else:
|
||||||
# pass the kwargs dictionary to the self.pipe method
|
# pass the kwargs dictionary to the self.pipe method
|
||||||
image = self.pipe(
|
image = self.pipe(
|
||||||
prompt,
|
prompt,
|
||||||
|
guidance_scale=self.cfg_scale,
|
||||||
**kwargs
|
**kwargs
|
||||||
).images[0]
|
).images[0]
|
||||||
|
|
||||||
# save the result
|
# save the result
|
||||||
image.save(request.dst)
|
image.save(request.dst)
|
||||||
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Media generated", success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user