mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
79 Commits
renovate/g
...
v2.0.0_bet
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d6073ac18e | ||
|
|
1c450d46cf | ||
|
|
6b312a8522 | ||
|
|
2b2007ae9e | ||
|
|
e94a34be8c | ||
|
|
c3fb4b1d8e | ||
|
|
e3ca1a7dbe | ||
|
|
2d64d8b444 | ||
|
|
9b98be160a | ||
|
|
9f708ff318 | ||
|
|
4e0ad33d92 | ||
|
|
519285bf38 | ||
|
|
fd1b7b3f22 | ||
|
|
687730a7f5 | ||
|
|
b7821361c3 | ||
|
|
63e1f8fffd | ||
|
|
824612f1b4 | ||
|
|
9482acfdfc | ||
|
|
c75bdd99e4 | ||
|
|
6f34e8f044 | ||
|
|
6d187af643 | ||
|
|
97e9598c79 | ||
|
|
5a6a6de3d7 | ||
|
|
b1a20effde | ||
|
|
ba5ab26f2e | ||
|
|
69f53211a1 | ||
|
|
9dddd1134d | ||
|
|
c5c77d2b0d | ||
|
|
763f94ca80 | ||
|
|
20d637e7b7 | ||
|
|
480b14c8dc | ||
|
|
999db4301a | ||
|
|
92cbc4d516 | ||
|
|
ff9afdb0fe | ||
|
|
3e35b20a02 | ||
|
|
9ea371d6cd | ||
|
|
7a0f9767da | ||
|
|
9d7363f2a7 | ||
|
|
8ee5cf38fd | ||
|
|
a6b788d220 | ||
|
|
ccd87cd9f0 | ||
|
|
b5af87fc6c | ||
|
|
3c9544b023 | ||
|
|
2f65671070 | ||
|
|
8c5436cbed | ||
|
|
548959b50f | ||
|
|
2addb9f99a | ||
|
|
fdd95d1d86 | ||
|
|
66a558ff41 | ||
|
|
733b612eb2 | ||
|
|
991ecce004 | ||
|
|
ad0e30bca5 | ||
|
|
55461188a4 | ||
|
|
5d2405fdef | ||
|
|
e9f1268225 | ||
|
|
803a0ac02a | ||
|
|
bde87d00b9 | ||
|
|
0eae727366 | ||
|
|
3b4c5d54d8 | ||
|
|
4e16bc2f13 | ||
|
|
562ac62f59 | ||
|
|
e7fa2e06f8 | ||
|
|
8123f009d0 | ||
|
|
622aaa9f7d | ||
|
|
7b1ee203ce | ||
|
|
f347e51927 | ||
|
|
9b17af18b3 | ||
|
|
23c7fbfe6b | ||
|
|
035fea676a | ||
|
|
6e1a234d15 | ||
|
|
5b596ea605 | ||
|
|
6bd56460de | ||
|
|
6ef7ea2635 | ||
|
|
f8c00fbaf1 | ||
|
|
d9a42cc4c5 | ||
|
|
fc0bc32814 | ||
|
|
c62504ac92 | ||
|
|
f227e918f9 | ||
|
|
c132dbadce |
22
.env
22
.env
@@ -66,4 +66,24 @@ MODELS_PATH=/models
|
||||
### Python backends GRPC max workers
|
||||
### Default number of workers for GRPC Python backends.
|
||||
### This actually controls wether a backend can process multiple requests or not.
|
||||
# PYTHON_GRPC_MAX_WORKERS=1
|
||||
# PYTHON_GRPC_MAX_WORKERS=1
|
||||
|
||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||
# LLAMACPP_PARALLEL=1
|
||||
|
||||
### Enable to run parallel requests
|
||||
# PARALLEL_REQUESTS=true
|
||||
|
||||
### Watchdog settings
|
||||
###
|
||||
# Enables watchdog to kill backends that are inactive for too much time
|
||||
# WATCHDOG_IDLE=true
|
||||
#
|
||||
# Enables watchdog to kill backends that are busy for too much time
|
||||
# WATCHDOG_BUSY=true
|
||||
#
|
||||
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
||||
# WATCHDOG_IDLE_TIMEOUT=5m
|
||||
#
|
||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||
# WATCHDOG_BUSY_TIMEOUT=5m
|
||||
@@ -15,7 +15,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: self-hosted
|
||||
runs-on: gpu
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.21.x']
|
||||
@@ -40,6 +40,8 @@ jobs:
|
||||
if [ ! -e /run/systemd/system ]; then
|
||||
sudo mkdir /run/systemd/system
|
||||
fi
|
||||
sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
|
||||
sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
|
||||
make \
|
||||
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||
BUILD_TYPE=cublas \
|
||||
@@ -57,4 +59,5 @@ jobs:
|
||||
make \
|
||||
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||
teardown-e2e || true
|
||||
docker system prune -f -a --volumes || true
|
||||
sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
|
||||
docker system prune -f -a --volumes || true
|
||||
223
.github/workflows/image.yml
vendored
223
.github/workflows/image.yml
vendored
@@ -14,8 +14,25 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
extras-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
dockerPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
@@ -24,123 +41,117 @@ jobs:
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: ''
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: 11
|
||||
cuda-minor-version: 7
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
ffmpeg: ''
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: 12
|
||||
cuda-minor-version: 1
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: 11
|
||||
cuda-minor-version: 7
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: 12
|
||||
cuda-minor-version: 1
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Release space from worker
|
||||
run: |
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
df -h
|
||||
echo
|
||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo apt-get remove -y '^mono-.*' || true
|
||||
sudo apt-get remove -y '^ghc-.*' || true
|
||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
sudo apt-get remove -y 'php.*' || true
|
||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
sudo apt-get remove -y '^google-.*' || true
|
||||
sudo apt-get remove -y azure-cli || true
|
||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
sudo apt-get remove -y '^gfortran-.*' || true
|
||||
sudo apt-get remove -y microsoft-edge-stable || true
|
||||
sudo apt-get remove -y firefox || true
|
||||
sudo apt-get remove -y powershell || true
|
||||
sudo apt-get remove -y r-base-core || true
|
||||
sudo apt-get autoremove -y
|
||||
sudo apt-get clean
|
||||
echo
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
sudo rm -rfv build || true
|
||||
df -h
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: quay.io/go-skynet/local-ai
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ matrix.tag-latest }}
|
||||
suffix=${{ matrix.tag-suffix }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ matrix.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }}
|
||||
FFMPEG=${{ matrix.ffmpeg }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: ''
|
||||
#platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
dockerPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
|
||||
147
.github/workflows/image_build.yml
vendored
Normal file
147
.github/workflows/image_build.yml
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
---
|
||||
name: 'build container images (reusable)'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-type:
|
||||
description: 'Build type'
|
||||
default: ''
|
||||
type: string
|
||||
cuda-major-version:
|
||||
description: 'CUDA major version'
|
||||
default: "11"
|
||||
type: string
|
||||
cuda-minor-version:
|
||||
description: 'CUDA minor version'
|
||||
default: "7"
|
||||
type: string
|
||||
platforms:
|
||||
description: 'Platforms'
|
||||
default: ''
|
||||
type: string
|
||||
tag-latest:
|
||||
description: 'Tag latest'
|
||||
default: ''
|
||||
type: string
|
||||
tag-suffix:
|
||||
description: 'Tag suffix'
|
||||
default: ''
|
||||
type: string
|
||||
ffmpeg:
|
||||
description: 'FFMPEG'
|
||||
default: ''
|
||||
type: string
|
||||
image-type:
|
||||
description: 'Image type'
|
||||
default: ''
|
||||
type: string
|
||||
runs-on:
|
||||
description: 'Runs on'
|
||||
required: true
|
||||
default: ''
|
||||
type: string
|
||||
secrets:
|
||||
dockerUsername:
|
||||
required: true
|
||||
dockerPassword:
|
||||
required: true
|
||||
jobs:
|
||||
reusable_image-build:
|
||||
runs-on: ${{ inputs.runs-on }}
|
||||
steps:
|
||||
- name: Force Install GIT latest
|
||||
run: |
|
||||
sudo apt-get update \
|
||||
&& sudo apt-get install -y software-properties-common \
|
||||
&& sudo apt-get update \
|
||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y git
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
# - name: Release space from worker
|
||||
# run: |
|
||||
# echo "Listing top largest packages"
|
||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
# head -n 30 <<< "${pkgs}"
|
||||
# echo
|
||||
# df -h
|
||||
# echo
|
||||
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||
# sudo rm -rf /usr/local/lib/android
|
||||
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
# sudo rm -rf /usr/share/dotnet
|
||||
# sudo apt-get remove -y '^mono-.*' || true
|
||||
# sudo apt-get remove -y '^ghc-.*' || true
|
||||
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
# sudo apt-get remove -y 'php.*' || true
|
||||
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
# sudo apt-get remove -y '^google-.*' || true
|
||||
# sudo apt-get remove -y azure-cli || true
|
||||
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
# sudo apt-get remove -y '^gfortran-.*' || true
|
||||
# sudo apt-get remove -y microsoft-edge-stable || true
|
||||
# sudo apt-get remove -y firefox || true
|
||||
# sudo apt-get remove -y powershell || true
|
||||
# sudo apt-get remove -y r-base-core || true
|
||||
# sudo apt-get autoremove -y
|
||||
# sudo apt-get clean
|
||||
# echo
|
||||
# echo "Listing top largest packages"
|
||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
# head -n 30 <<< "${pkgs}"
|
||||
# echo
|
||||
# sudo rm -rfv build || true
|
||||
# df -h
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: quay.io/go-skynet/local-ai
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.dockerUsername }}
|
||||
password: ${{ secrets.dockerPassword }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||
FFMPEG=${{ inputs.ffmpeg }}
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
- name: job summary
|
||||
run: |
|
||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||
44
.github/workflows/test.yml
vendored
44
.github/workflows/test.yml
vendored
@@ -14,7 +14,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
tests-linux:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -67,41 +67,35 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
sudo pip install -r extra/requirements.txt
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||
|
||||
|
||||
# Pre-build stable diffusion before we install a newever version of abseil (not compatible with stablediffusion-ncn)
|
||||
GO_TAGS="tts stablediffusion" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
|
||||
curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
|
||||
tar -xzvf - && \
|
||||
mkdir -p "spdlog-1.11.0/build" && \
|
||||
cd "spdlog-1.11.0/build" && \
|
||||
cmake .. && \
|
||||
make -j8 && \
|
||||
sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
||||
cd /build && \
|
||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
|
||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
||||
sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||
make sources/go-piper && \
|
||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||
sudo cp -rfv sources/go-piper/piper/build/pi/lib/. /usr/lib/ && \
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12 install
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
|
||||
GO_TAGS="stablediffusion tts" make test
|
||||
|
||||
macOS-latest:
|
||||
tests-apple:
|
||||
runs-on: macOS-latest
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -1,15 +1,10 @@
|
||||
# go-llama build artifacts
|
||||
go-llama
|
||||
go-llama-stable
|
||||
/gpt4all
|
||||
go-stable-diffusion
|
||||
go-piper
|
||||
/go-bert
|
||||
go-ggllm
|
||||
/piper
|
||||
/sources/
|
||||
__pycache__/
|
||||
*.a
|
||||
get-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
/backend/cpp/llama/llama.cpp
|
||||
|
||||
go-ggml-transformers
|
||||
go-gpt2
|
||||
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "docs/themes/hugo-theme-relearn"]
|
||||
path = docs/themes/hugo-theme-relearn
|
||||
url = https://github.com/McShelby/hugo-theme-relearn.git
|
||||
101
Dockerfile
101
Dockerfile
@@ -8,13 +8,11 @@ FROM golang:$GO_VERSION as requirements-core
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
ARG SPDLOG_VERSION="1.11.0"
|
||||
ARG PIPER_PHONEMIZE_VERSION='1.0.0'
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py,autogptq:/build/extra/grpc/autogptq/autogptq.py,bark:/build/extra/grpc/bark/ttsbark.py,diffusers:/build/extra/grpc/diffusers/backend_diffusers.py,exllama:/build/extra/grpc/exllama/exllama.py,vall-e-x:/build/extra/grpc/vall-e-x/ttsvalle.py,vllm:/build/extra/grpc/vllm/backend_vllm.py"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
|
||||
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
|
||||
@@ -22,6 +20,9 @@ RUN apt-get update && \
|
||||
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
|
||||
|
||||
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
|
||||
# Use the variables in subsequent instructions
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
RUN echo "Target Variant: $TARGETVARIANT"
|
||||
@@ -49,45 +50,24 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# piper requirements
|
||||
# Use pre-compiled Piper phonemization library (includes onnxruntime)
|
||||
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
|
||||
RUN test -n "$TARGETARCH" \
|
||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||
|
||||
RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
|
||||
tar -xzvf - && \
|
||||
mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
|
||||
cd "spdlog-${SPDLOG_VERSION}/build" && \
|
||||
cmake .. && \
|
||||
make -j8 && \
|
||||
cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
||||
cd /build && \
|
||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \
|
||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
||||
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ && \
|
||||
rm spdlog-${SPDLOG_VERSION} -rf
|
||||
|
||||
# Extras requirements
|
||||
FROM requirements-core as requirements-extras
|
||||
|
||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
||||
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y conda
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
||||
fi
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
||||
fi
|
||||
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
||||
|
||||
# Vall-e-X
|
||||
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
||||
|
||||
# \
|
||||
# ; fi
|
||||
@@ -108,12 +88,9 @@ ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY Makefile .
|
||||
RUN make get-sources
|
||||
COPY go.mod .
|
||||
RUN make prepare
|
||||
COPY . .
|
||||
COPY .git .
|
||||
RUN make prepare
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
@@ -122,11 +99,16 @@ RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && make -j12 install && rm -rf grpc \
|
||||
../.. && make -j12 install \
|
||||
; fi
|
||||
|
||||
# Rebuild with defaults backends
|
||||
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper/build/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper/build/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper/build/pi/lib/keep \
|
||||
; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
@@ -136,6 +118,7 @@ FROM requirements-${IMAGE_TYPE}
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
ARG TARGETARCH
|
||||
ARG IMAGE_TYPE=extras
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV REBUILD=false
|
||||
@@ -158,22 +141,60 @@ WORKDIR /build
|
||||
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
|
||||
# https://github.com/go-skynet/LocalAI/pull/434
|
||||
COPY . .
|
||||
RUN make prepare-sources
|
||||
|
||||
COPY --from=builder /build/sources ./sources/
|
||||
COPY --from=builder /build/grpc ./grpc/
|
||||
|
||||
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
|
||||
|
||||
# Copy the binary
|
||||
COPY --from=builder /build/local-ai ./
|
||||
|
||||
# Copy shared libraries for piper
|
||||
COPY --from=builder /build/sources/go-piper/piper/build/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
|
||||
; fi
|
||||
|
||||
# Copy VALLE-X as it's not a real "lib"
|
||||
# TODO: this is wrong - we should copy the lib into the conda env path
|
||||
RUN if [ -d /usr/lib/vall-e-x ]; then \
|
||||
cp -rfv /usr/lib/vall-e-x/* ./ ; \
|
||||
fi
|
||||
|
||||
# we also copy exllama libs over to resolve exllama import error
|
||||
# TODO: check if this is still needed
|
||||
RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \
|
||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama backend/python/exllama/;\
|
||||
fi
|
||||
|
||||
# Define the health check command
|
||||
|
||||
10
Entitlements.plist
Normal file
10
Entitlements.plist
Normal file
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.network.client</key>
|
||||
<true/>
|
||||
<key>com.apple.security.network.server</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
407
Makefile
407
Makefile
@@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
||||
|
||||
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||
|
||||
CPPLLAMA_VERSION?=96981f37b1e3f450d9e63e571514217bf60f0a7f
|
||||
CPPLLAMA_VERSION?=1f5cd83275fabb43f2ae92c30033b384a3eb37b4
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -28,16 +28,10 @@ WHISPER_CPP_VERSION?=85ed71aaec8e0612a84c0b67804bde75aa75a273
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
|
||||
# go-piper version
|
||||
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
|
||||
|
||||
# go-bloomz version
|
||||
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
|
||||
PIPER_VERSION?=5a4c9e28c84bac09ab6baa9f88457d852cb46bb2
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
||||
|
||||
# Go-ggllm
|
||||
GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b
|
||||
STABLEDIFFUSION_VERSION?=902db5f066fd137697e3b69d0fa10d4782bd2c2f
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
@@ -58,7 +52,6 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION
|
||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
|
||||
OPTIONAL_TARGETS?=
|
||||
ESPEAK_DATA?=
|
||||
|
||||
OS := $(shell uname -s)
|
||||
ARCH := $(shell uname -m)
|
||||
@@ -75,12 +68,19 @@ ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
ifeq ($(OS),Darwin)
|
||||
CGO_LDFLAGS += -lcblas -framework Accelerate
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
# explicit disable metal if on Darwin and metal is disabled
|
||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||
endif
|
||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||
endif
|
||||
|
||||
# on OSX, if BUILD_TYPE is blank, we should default to use Metal
|
||||
ifeq ($(BUILD_TYPE),)
|
||||
BUILD_TYPE=metal
|
||||
# disable metal if on Darwin and any other value is explicitly passed.
|
||||
else ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),openblas)
|
||||
@@ -96,7 +96,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# Llama-stable has no hipblas support, so override it here.
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
@@ -126,10 +126,12 @@ endif
|
||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||
PIPER_CGO_CXXFLAGS+=-I$(shell pwd)/sources/go-piper/piper/src/cpp -I$(shell pwd)/sources/go-piper/piper/build/fi/include -I$(shell pwd)/sources/go-piper/piper/build/pi/include -I$(shell pwd)/sources/go-piper/piper/build/si/include
|
||||
PIPER_CGO_LDFLAGS+=-L$(shell pwd)/sources/go-piper/piper/build/fi/lib -L$(shell pwd)/sources/go-piper/piper/build/pi/lib -L$(shell pwd)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||
|
||||
# If empty, then we build all
|
||||
@@ -142,136 +144,116 @@ endif
|
||||
all: help
|
||||
|
||||
## GPT4ALL
|
||||
gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## go-ggllm
|
||||
go-ggllm:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
|
||||
cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-ggllm/libggllm.a: go-ggllm
|
||||
$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
|
||||
sources/gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
|
||||
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## go-piper
|
||||
go-piper:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
||||
cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-piper:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
|
||||
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## BERT embeddings
|
||||
go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
|
||||
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
||||
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## stable diffusion
|
||||
go-stable-diffusion:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion go-stable-diffusion
|
||||
cd go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-stable-diffusion:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-stable-diffusion/libstablediffusion.a:
|
||||
$(MAKE) -C go-stable-diffusion libstablediffusion.a
|
||||
sources/go-stable-diffusion/libstablediffusion.a:
|
||||
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||
|
||||
## RWKV
|
||||
go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
||||
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
|
||||
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-rwkv/librwkv.a: go-rwkv
|
||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
||||
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
|
||||
## bloomz
|
||||
bloomz:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
||||
cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-bert/libgobert.a: sources/go-bert
|
||||
$(MAKE) -C sources/go-bert libgobert.a
|
||||
|
||||
bloomz/libbloomz.a: bloomz
|
||||
cd bloomz && make libbloomz.a
|
||||
|
||||
go-bert/libgobert.a: go-bert
|
||||
$(MAKE) -C go-bert libgobert.a
|
||||
|
||||
backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
mkdir -p backend-assets/gpt4all
|
||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||
|
||||
backend-assets/espeak-ng-data:
|
||||
backend-assets/espeak-ng-data: sources/go-piper
|
||||
mkdir -p backend-assets/espeak-ng-data
|
||||
ifdef ESPEAK_DATA
|
||||
@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
|
||||
else
|
||||
@echo "ESPEAK_DATA not set, skipping tts. Note that this will break the tts functionality."
|
||||
@touch backend-assets/espeak-ng-data/keep
|
||||
endif
|
||||
$(MAKE) -C sources/go-piper piper.o
|
||||
@cp -rf sources/go-piper/piper/build/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||
|
||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
|
||||
## CEREBRAS GPT
|
||||
go-ggml-transformers:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
|
||||
cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-ggml-transformers:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp sources/go-ggml-transformers
|
||||
cd sources/go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-ggml-transformers/libtransformers.a: go-ggml-transformers
|
||||
$(MAKE) -C go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
|
||||
sources/go-ggml-transformers/libtransformers.a: sources/go-ggml-transformers
|
||||
$(MAKE) -C sources/go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
|
||||
|
||||
whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
whisper.cpp/libwhisper.a: whisper.cpp
|
||||
cd whisper.cpp && make libwhisper.a
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && make libwhisper.a
|
||||
|
||||
go-llama:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-llama:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
|
||||
cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-llama-stable:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-stable
|
||||
cd go-llama-stable && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
sources/go-llama-ggml:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
||||
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||
sources/go-llama/libbinding.a: sources/go-llama
|
||||
$(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||
|
||||
go-llama-stable/libbinding.a: go-llama-stable
|
||||
$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
||||
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
go-piper/libpiper_binding.a: go-piper
|
||||
$(MAKE) -C go-piper libpiper_binding.a example/main
|
||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main
|
||||
|
||||
get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
|
||||
backend/cpp/llama/llama.cpp:
|
||||
$(MAKE) -C backend/cpp/llama llama.cpp
|
||||
|
||||
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/go-ggml-transformers sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion
|
||||
touch $@
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
|
||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/sources/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/sources/go-ggml-transformers
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/sources/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/sources/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/sources/go-piper
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
$(GOCMD) mod download
|
||||
touch $@
|
||||
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C go-llama clean
|
||||
$(MAKE) -C go-llama-stable clean
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C go-ggml-transformers clean
|
||||
$(MAKE) -C go-rwkv clean
|
||||
$(MAKE) -C whisper.cpp clean
|
||||
$(MAKE) -C go-stable-diffusion clean
|
||||
$(MAKE) -C go-bert clean
|
||||
$(MAKE) -C bloomz clean
|
||||
$(MAKE) -C go-piper clean
|
||||
$(MAKE) -C go-ggllm clean
|
||||
$(MAKE) -C sources/go-llama clean
|
||||
$(MAKE) -C sources/go-llama-ggml clean
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C sources/go-ggml-transformers clean
|
||||
$(MAKE) -C sources/go-rwkv clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-bert clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||
@@ -280,21 +262,12 @@ prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||
clean: ## Remove build related file
|
||||
$(GOCMD) clean -cache
|
||||
rm -f prepare
|
||||
rm -rf ./go-llama
|
||||
rm -rf ./gpt4all
|
||||
rm -rf ./go-llama-stable
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf ./go-stable-diffusion
|
||||
rm -rf ./go-ggml-transformers
|
||||
rm -rf ./backend-assets
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf ./go-bert
|
||||
rm -rf ./bloomz
|
||||
rm -rf ./whisper.cpp
|
||||
rm -rf ./go-piper
|
||||
rm -rf ./go-ggllm
|
||||
rm -rf ./sources
|
||||
rm -rf $(BINARY_NAME)
|
||||
rm -rf release/
|
||||
rm -rf ./backend/cpp/grpc/grpc_repo
|
||||
rm -rf ./backend/cpp/grpc/build
|
||||
rm -rf ./backend/cpp/grpc/installed_packages
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
|
||||
## Build:
|
||||
@@ -311,6 +284,9 @@ dist: build
|
||||
mkdir -p release
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
||||
|
||||
osx-signed: build
|
||||
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
||||
|
||||
## Run
|
||||
run: prepare ## run local-ai
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
||||
@@ -318,12 +294,12 @@ run: prepare ## run local-ai
|
||||
test-models/testmodel:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
||||
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
wget https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||
wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||
cp tests/models_fixtures/* test-models
|
||||
|
||||
prepare-test: grpcs
|
||||
@@ -334,8 +310,8 @@ test: prepare test-models/testmodel grpcs
|
||||
@echo 'Running tests'
|
||||
export GO_TAGS="tts stablediffusion"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
|
||||
$(MAKE) test-gpt4all
|
||||
$(MAKE) test-llama
|
||||
$(MAKE) test-llama-gguf
|
||||
@@ -402,38 +378,69 @@ protogen: protogen-go protogen-python
|
||||
|
||||
protogen-go:
|
||||
protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \
|
||||
pkg/grpc/proto/backend.proto
|
||||
backend/backend.proto
|
||||
|
||||
protogen-python:
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/autogptq/ --grpc_python_out=extra/grpc/autogptq/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/exllama/ --grpc_python_out=extra/grpc/exllama/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/bark/ --grpc_python_out=extra/grpc/bark/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/diffusers/ --grpc_python_out=extra/grpc/diffusers/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vall-e-x/ --grpc_python_out=extra/grpc/vall-e-x/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vllm/ --grpc_python_out=extra/grpc/vllm/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
|
||||
|
||||
## GRPC
|
||||
# Note: it is duplicated in the Dockerfile
|
||||
prepare-extra-conda-environments:
|
||||
$(MAKE) -C backend/python/autogptq
|
||||
$(MAKE) -C backend/python/bark
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/sentencetransformers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/vall-e-x
|
||||
$(MAKE) -C backend/python/exllama
|
||||
$(MAKE) -C backend/python/petals
|
||||
|
||||
|
||||
backend-assets/grpc:
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/
|
||||
|
||||
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/sources/go-llama
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-llama LIBRARY_PATH=$(shell pwd)/sources/go-llama \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
|
||||
# TODO: every binary should have its own folder instead, so can have different implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||
cp backend/cpp/llama/llama.cpp/ggml-metal.metal backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend/cpp/llama/grpc-server:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||
## BACKEND CPP LLAMA START
|
||||
# Sets the variables in case it has to build the gRPC locally.
|
||||
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
||||
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||
|
||||
backend/cpp/llama/grpc-server:
|
||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||
backend/cpp/grpc/script/build_grpc.sh ${INSTALLED_PACKAGES}
|
||||
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
|
||||
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
|
||||
export PATH=${PATH}:${INSTALLED_PACKAGES}/bin && \
|
||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||
else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||
endif
|
||||
## BACKEND CPP LLAMA END
|
||||
|
||||
##
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
@@ -441,75 +448,71 @@ ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-stable ./cmd/grpc/llama-stable/
|
||||
backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/sources/go-llama-ggml
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-llama-ggml LIBRARY_PATH=$(shell pwd)/sources/go-llama-ggml \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
|
||||
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/
|
||||
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||
|
||||
backend-assets/grpc/dolly: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./cmd/grpc/dolly/
|
||||
backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/
|
||||
|
||||
backend-assets/grpc/gpt2: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./cmd/grpc/gpt2/
|
||||
backend-assets/grpc/gpt2: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./backend/go/llm/gpt2/
|
||||
|
||||
backend-assets/grpc/gptj: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./cmd/grpc/gptj/
|
||||
backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/
|
||||
|
||||
backend-assets/grpc/gptneox: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./cmd/grpc/gptneox/
|
||||
backend-assets/grpc/gptneox: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./backend/go/llm/gptneox/
|
||||
|
||||
backend-assets/grpc/mpt: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./cmd/grpc/mpt/
|
||||
backend-assets/grpc/mpt: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./backend/go/llm/mpt/
|
||||
|
||||
backend-assets/grpc/replit: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./cmd/grpc/replit/
|
||||
backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/
|
||||
|
||||
backend-assets/grpc/falcon-ggml: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./cmd/grpc/falcon-ggml/
|
||||
backend-assets/grpc/falcon-ggml: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./backend/go/llm/falcon-ggml/
|
||||
|
||||
backend-assets/grpc/starcoder: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./cmd/grpc/starcoder/
|
||||
backend-assets/grpc/starcoder: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./backend/go/llm/starcoder/
|
||||
|
||||
backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
||||
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-rwkv LIBRARY_PATH=$(shell pwd)/sources/go-rwkv \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
|
||||
backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/
|
||||
|
||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-bert LIBRARY_PATH=$(shell pwd)/sources/go-bert \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
|
||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
||||
|
||||
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
||||
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
||||
$(MAKE) go-stable-diffusion/libstablediffusion.a; \
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/; \
|
||||
$(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/; \
|
||||
fi
|
||||
|
||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
|
||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
|
||||
backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/
|
||||
backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/whisper.cpp LIBRARY_PATH=$(shell pwd)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||
|
||||
grpcs: prepare $(GRPC_BACKENDS)
|
||||
|
||||
68
README.md
68
README.md
@@ -22,14 +22,13 @@
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/)
|
||||
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format, pytorch and more. Does not require GPU.
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
||||
|
||||
<p align="center"><b>Follow LocalAI </b></p>
|
||||
<p align="center"><b>Follow LocalAI </b></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||
@@ -39,7 +38,7 @@
|
||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||
</a>
|
||||
|
||||
<p align="center"><b>Connect with the Creator </b></p>
|
||||
<p align="center"><b>Connect with the Creator </b></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/mudler_it" target="blank">
|
||||
@@ -50,7 +49,7 @@
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<p align="center"><b>Share LocalAI Repository</b></p>
|
||||
<p align="center"><b>Share LocalAI Repository</b></p>
|
||||
|
||||
<p align="center">
|
||||
|
||||
@@ -64,6 +63,22 @@
|
||||
|
||||
</p>
|
||||
|
||||
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 🔥🔥 Hot topics / Roadmap
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
🆕 New! [LLM finetuning guide](https://localai.io/advanced/fine-tuning/)
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
|
||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||
|
||||
|
||||
|
||||
<hr>
|
||||
|
||||
In a nutshell:
|
||||
@@ -79,8 +94,6 @@ LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a
|
||||
|
||||
Note that this started just as a [fun weekend project](https://localai.io/#backstory) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!
|
||||
|
||||
## 🔥🔥 [Hot topics / Roadmap](https://localai.io/#-hot-topics--roadmap)
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
@@ -91,8 +104,32 @@ Note that this started just as a [fun weekend project](https://localai.io/#backs
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||
|
||||
### 🔗 Community and integrations
|
||||
|
||||
WebUIs:
|
||||
- https://github.com/Jirubizu/localai-admin
|
||||
- https://github.com/go-skynet/LocalAI-frontend
|
||||
|
||||
Model galleries
|
||||
- https://github.com/go-skynet/model-gallery
|
||||
|
||||
Other:
|
||||
- Helm chart https://github.com/go-skynet/helm-charts
|
||||
|
||||
### 🔗 Resources
|
||||
|
||||
- 🆕 New! [LLM finetuning guide](https://localai.io/advanced/fine-tuning/)
|
||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||
- [Projects integrating LocalAI](https://localai.io/integrations/)
|
||||
- [How tos section](https://localai.io/howtos/) (curated by our community)
|
||||
|
||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||
|
||||
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
||||
@@ -100,21 +137,6 @@ Note that this started just as a [fun weekend project](https://localai.io/#backs
|
||||
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||
|
||||
### 💡 Example: Use Luna-AI Llama model
|
||||
|
||||
See the [documentation](https://localai.io/basics/getting_started)
|
||||
|
||||
### 🔗 Resources
|
||||
|
||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||
- [Projects integrating LocalAI](https://localai.io/integrations/)
|
||||
- [How tos section](https://localai.io/howtos/) (curated by our community)
|
||||
|
||||
## Citation
|
||||
|
||||
If you utilize this repository, data in a downstream project, please consider citing it with:
|
||||
|
||||
17
api/api.go
17
api/api.go
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
@@ -79,6 +80,22 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
||||
options.Loader.StopAllGRPC()
|
||||
}()
|
||||
|
||||
if options.WatchDog {
|
||||
wd := model.NewWatchDog(
|
||||
options.Loader,
|
||||
options.WatchDogBusyTimeout,
|
||||
options.WatchDogIdleTimeout,
|
||||
options.WatchDogBusy,
|
||||
options.WatchDogIdle)
|
||||
options.Loader.SetWatchDog(wd)
|
||||
go wd.Run()
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
log.Debug().Msgf("Context canceled, shutting down")
|
||||
wd.Shutdown()
|
||||
}()
|
||||
}
|
||||
|
||||
return options, cl, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -301,7 +301,7 @@ var _ = Describe("API test", func() {
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||
Name: "openllama_3b",
|
||||
Overrides: map[string]interface{}{"backend": "llama-stable", "mmap": true, "f16": true, "context_size": 128},
|
||||
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -457,7 +457,7 @@ var _ = Describe("API test", func() {
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
return response["processed"].(bool)
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
}, "960s", "10s").Should(Equal(true))
|
||||
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -687,7 +687,7 @@ var _ = Describe("API test", func() {
|
||||
Input: []string{"sun", "cat"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(err).ToNot(HaveOccurred(), err)
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||
|
||||
@@ -704,7 +704,7 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
|
||||
Context("External gRPC calls", func() {
|
||||
It("calculate embeddings with huggingface", func() {
|
||||
It("calculate embeddings with sentencetransformers", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
CFGScale: c.Diffusers.CFGScale,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
LoraScale: c.LoraScale,
|
||||
LoraBase: c.LoraBase,
|
||||
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||
CLIPModel: c.Diffusers.ClipModel,
|
||||
|
||||
@@ -26,7 +26,7 @@ type TokenUsage struct {
|
||||
Completion int
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
modelFile := c.Model
|
||||
|
||||
grpcOpts := gRPCModelOpts(c)
|
||||
@@ -72,6 +72,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
|
||||
fn := func() (LLMResponse, error) {
|
||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||
opts.Prompt = s
|
||||
opts.Images = images
|
||||
|
||||
tokenUsage := TokenUsage{}
|
||||
|
||||
|
||||
@@ -16,6 +16,10 @@ func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.
|
||||
opts = append(opts, model.WithSingleActiveBackend())
|
||||
}
|
||||
|
||||
if o.ParallelBackendRequests {
|
||||
opts = append(opts, model.EnableParallelRequests)
|
||||
}
|
||||
|
||||
if c.GRPC.Attempts != 0 {
|
||||
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
||||
}
|
||||
@@ -38,29 +42,35 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
||||
}
|
||||
|
||||
return &pb.ModelOptions{
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NoMulMatQ: c.NoMulMatQ,
|
||||
DraftModel: c.DraftModel,
|
||||
AudioPath: c.VallE.AudioPath,
|
||||
Quantization: c.Quantization,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
LoraBase: c.LoraBase,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NoMulMatQ: c.NoMulMatQ,
|
||||
DraftModel: c.DraftModel,
|
||||
AudioPath: c.VallE.AudioPath,
|
||||
Quantization: c.Quantization,
|
||||
MMProj: c.MMProj,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
YarnBetaFast: c.YarnBetaFast,
|
||||
YarnBetaSlow: c.YarnBetaSlow,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
LoraBase: c.LoraBase,
|
||||
LoraScale: c.LoraScale,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
// AutoGPTQ
|
||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
||||
Device: c.AutoGPTQ.Device,
|
||||
|
||||
@@ -100,10 +100,18 @@ type LLMConfig struct {
|
||||
NUMA bool `yaml:"numa"`
|
||||
LoraAdapter string `yaml:"lora_adapter"`
|
||||
LoraBase string `yaml:"lora_base"`
|
||||
LoraScale float32 `yaml:"lora_scale"`
|
||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||
DraftModel string `yaml:"draft_model"`
|
||||
NDraft int32 `yaml:"n_draft"`
|
||||
Quantization string `yaml:"quantization"`
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
|
||||
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
||||
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
||||
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
||||
}
|
||||
|
||||
type AutoGPTQ struct {
|
||||
@@ -269,7 +277,7 @@ func (cm *ConfigLoader) LoadConfigs(path string) error {
|
||||
}
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") {
|
||||
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadConfig(filepath.Join(path, file.Name()))
|
||||
|
||||
@@ -123,13 +123,12 @@ func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
|
||||
return err
|
||||
}
|
||||
|
||||
client := bm.options.Loader.CheckIsLoaded(backendId)
|
||||
|
||||
if client == nil {
|
||||
model := bm.options.Loader.CheckIsLoaded(backendId)
|
||||
if model == "" {
|
||||
return fmt.Errorf("backend %s is not currently loaded", backendId)
|
||||
}
|
||||
|
||||
status, rpcErr := client.Status(context.TODO())
|
||||
status, rpcErr := model.GRPC(false, nil).Status(context.TODO())
|
||||
if rpcErr != nil {
|
||||
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
|
||||
val, slbErr := bm.SampleLocalBackendProcess(backendId)
|
||||
|
||||
@@ -81,6 +81,10 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
}
|
||||
|
||||
// process functions if we have any defined or if we have a function call string
|
||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||
log.Debug().Msgf("Response needs to process functions")
|
||||
@@ -140,14 +144,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
}
|
||||
}
|
||||
r := config.Roles[role]
|
||||
contentExists := i.Content != nil && *i.Content != ""
|
||||
contentExists := i.Content != nil && i.StringContent != ""
|
||||
// First attempt to populate content via a chat message specific template
|
||||
if config.TemplateConfig.ChatMessage != "" {
|
||||
chatMessageData := model.ChatMessageTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
Role: r,
|
||||
RoleName: role,
|
||||
Content: *i.Content,
|
||||
Content: i.StringContent,
|
||||
MessageIndex: messageIndex,
|
||||
}
|
||||
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
||||
@@ -166,7 +170,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
if content == "" {
|
||||
if r != "" {
|
||||
if contentExists {
|
||||
content = fmt.Sprint(r, " ", *i.Content)
|
||||
content = fmt.Sprint(r, i.StringContent)
|
||||
}
|
||||
if i.FunctionCall != nil {
|
||||
j, err := json.Marshal(i.FunctionCall)
|
||||
@@ -180,7 +184,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
}
|
||||
} else {
|
||||
if contentExists {
|
||||
content = fmt.Sprint(*i.Content)
|
||||
content = fmt.Sprint(i.StringContent)
|
||||
}
|
||||
if i.FunctionCall != nil {
|
||||
j, err := json.Marshal(i.FunctionCall)
|
||||
@@ -334,7 +338,11 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
||||
// Note: This costs (in term of CPU) another computation
|
||||
config.Grammar = ""
|
||||
predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
|
||||
images := []string{}
|
||||
for _, m := range input.Messages {
|
||||
images = append(images, m.StringImages...)
|
||||
}
|
||||
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
|
||||
if err != nil {
|
||||
log.Error().Msgf("inference error: %s", err.Error())
|
||||
return
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
@@ -64,6 +65,10 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
if input.Stream {
|
||||
|
||||
@@ -100,7 +100,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
||||
}
|
||||
|
||||
b64JSON := false
|
||||
if input.ResponseFormat == "b64_json" {
|
||||
if input.ResponseFormat.Type == "b64_json" {
|
||||
b64JSON = true
|
||||
}
|
||||
// src and clip_skip
|
||||
|
||||
@@ -23,8 +23,13 @@ func ComputeChoices(
|
||||
n = 1
|
||||
}
|
||||
|
||||
images := []string{}
|
||||
for _, m := range req.Messages {
|
||||
images = append(images, m.StringImages...)
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
|
||||
if err != nil {
|
||||
return result, backend.TokenUsage{}, err
|
||||
}
|
||||
|
||||
@@ -2,8 +2,11 @@ package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -24,7 +27,7 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
||||
input.Cancel = cancel
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return "", nil, err
|
||||
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||
}
|
||||
|
||||
modelFile := input.Model
|
||||
@@ -61,6 +64,37 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
||||
return modelFile, input, nil
|
||||
}
|
||||
|
||||
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||
// encodes it in base64 and returns the base64 string
|
||||
func getBase64Image(s string) (string, error) {
|
||||
if strings.HasPrefix(s, "http") {
|
||||
// download the image
|
||||
resp, err := http.Get(s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// read the image data into memory
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// encode the image data in base64
|
||||
encoded := base64.StdEncoding.EncodeToString(data)
|
||||
|
||||
// return the base64 string
|
||||
return encoded, nil
|
||||
}
|
||||
|
||||
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
|
||||
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
|
||||
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
|
||||
}
|
||||
return "", fmt.Errorf("not valid string")
|
||||
}
|
||||
|
||||
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
@@ -129,6 +163,35 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||
}
|
||||
}
|
||||
|
||||
// Decode each request's message content
|
||||
index := 0
|
||||
for i, m := range input.Messages {
|
||||
switch content := m.Content.(type) {
|
||||
case string:
|
||||
input.Messages[i].StringContent = content
|
||||
case []interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
c := []schema.Content{}
|
||||
json.Unmarshal(dat, &c)
|
||||
for _, pp := range c {
|
||||
if pp.Type == "text" {
|
||||
input.Messages[i].StringContent = pp.Text
|
||||
} else if pp.Type == "image_url" {
|
||||
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
|
||||
base64, err := getBase64Image(pp.ImageURL.URL)
|
||||
if err == nil {
|
||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||
// set a placeholder for each image
|
||||
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
|
||||
index++
|
||||
} else {
|
||||
fmt.Print("Failed encoding image", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
@@ -4,10 +4,11 @@ import (
|
||||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -36,7 +37,13 @@ type Option struct {
|
||||
|
||||
AutoloadGalleries bool
|
||||
|
||||
SingleBackend bool
|
||||
SingleBackend bool
|
||||
ParallelBackendRequests bool
|
||||
|
||||
WatchDogIdle bool
|
||||
WatchDogBusy bool
|
||||
WatchDog bool
|
||||
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
||||
}
|
||||
|
||||
type AppOption func(*Option)
|
||||
@@ -62,10 +69,40 @@ func WithCors(b bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
var EnableWatchDog = func(o *Option) {
|
||||
o.WatchDog = true
|
||||
}
|
||||
|
||||
var EnableWatchDogIdleCheck = func(o *Option) {
|
||||
o.WatchDog = true
|
||||
o.WatchDogIdle = true
|
||||
}
|
||||
|
||||
var EnableWatchDogBusyCheck = func(o *Option) {
|
||||
o.WatchDog = true
|
||||
o.WatchDogBusy = true
|
||||
}
|
||||
|
||||
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
|
||||
return func(o *Option) {
|
||||
o.WatchDogBusyTimeout = t
|
||||
}
|
||||
}
|
||||
|
||||
func SetWatchDogIdleTimeout(t time.Duration) AppOption {
|
||||
return func(o *Option) {
|
||||
o.WatchDogIdleTimeout = t
|
||||
}
|
||||
}
|
||||
|
||||
var EnableSingleBackend = func(o *Option) {
|
||||
o.SingleBackend = true
|
||||
}
|
||||
|
||||
var EnableParallelBackendRequests = func(o *Option) {
|
||||
o.ParallelBackendRequests = true
|
||||
}
|
||||
|
||||
var EnableGalleriesAutoload = func(o *Option) {
|
||||
o.AutoloadGalleries = true
|
||||
}
|
||||
|
||||
@@ -55,11 +55,25 @@ type Choice struct {
|
||||
Text string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
type Content struct {
|
||||
Type string `json:"type" yaml:"type"`
|
||||
Text string `json:"text" yaml:"text"`
|
||||
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||
}
|
||||
|
||||
type ContentURL struct {
|
||||
URL string `json:"url" yaml:"url"`
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
// The message role
|
||||
Role string `json:"role,omitempty" yaml:"role"`
|
||||
// The message content
|
||||
Content *string `json:"content" yaml:"content"`
|
||||
Content interface{} `json:"content" yaml:"content"`
|
||||
|
||||
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
||||
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
||||
|
||||
// A result of a function call
|
||||
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
||||
}
|
||||
@@ -69,6 +83,12 @@ type OpenAIModel struct {
|
||||
Object string `json:"object"`
|
||||
}
|
||||
|
||||
type ChatCompletionResponseFormatType string
|
||||
|
||||
type ChatCompletionResponseFormat struct {
|
||||
Type ChatCompletionResponseFormatType `json:"type,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIRequest struct {
|
||||
config.PredictionOptions
|
||||
|
||||
@@ -78,7 +98,7 @@ type OpenAIRequest struct {
|
||||
// whisper
|
||||
File string `json:"file" validate:"required"`
|
||||
//whisper/image
|
||||
ResponseFormat string `json:"response_format"`
|
||||
ResponseFormat ChatCompletionResponseFormat `json:"response_format"`
|
||||
// image
|
||||
Size string `json:"size"`
|
||||
// Prompt is read only by completion/image API calls
|
||||
|
||||
@@ -64,6 +64,7 @@ message PredictOptions {
|
||||
float NegativePromptScale = 39;
|
||||
string NegativePrompt = 40;
|
||||
int32 NDraft = 41;
|
||||
repeated string Images = 42;
|
||||
}
|
||||
|
||||
// The response message containing the result
|
||||
@@ -116,6 +117,8 @@ message ModelOptions {
|
||||
// LLM (llama.cpp)
|
||||
string LoraBase = 35;
|
||||
string LoraAdapter = 36;
|
||||
float LoraScale = 42;
|
||||
|
||||
bool NoMulMatQ = 37;
|
||||
string DraftModel = 39;
|
||||
|
||||
@@ -123,6 +126,14 @@ message ModelOptions {
|
||||
|
||||
// vllm
|
||||
string Quantization = 40;
|
||||
|
||||
string MMProj = 41;
|
||||
|
||||
string RopeScaling = 43;
|
||||
float YarnExtFactor = 44;
|
||||
float YarnAttnFactor = 45;
|
||||
float YarnBetaFast = 46;
|
||||
float YarnBetaSlow = 47;
|
||||
}
|
||||
|
||||
message Result {
|
||||
3
backend/cpp/grpc/.gitignore
vendored
Normal file
3
backend/cpp/grpc/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
installed_packages/
|
||||
grpc_build/
|
||||
grpc_repo/
|
||||
81
backend/cpp/grpc/script/build_grpc.sh
Executable file
81
backend/cpp/grpc/script/build_grpc.sh
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Builds locally from sources the packages needed by the llama cpp backend.
|
||||
|
||||
# Makes sure a few base packages exist.
|
||||
# sudo apt-get --no-upgrade -y install g++ gcc binutils cmake git build-essential autoconf libtool pkg-config
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
echo "Script directory: $SCRIPT_DIR"
|
||||
|
||||
CPP_INSTALLED_PACKAGES_DIR=$1
|
||||
if [ -z ${CPP_INSTALLED_PACKAGES_DIR} ]; then
|
||||
echo "CPP_INSTALLED_PACKAGES_DIR env variable not set. Don't know where to install: failed.";
|
||||
echo
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if [ -d "${CPP_INSTALLED_PACKAGES_DIR}" ]; then
|
||||
echo "gRPC installation directory already exists. Nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# The depth when cloning a git repo. 1 speeds up the clone when the repo history is not needed.
|
||||
GIT_CLONE_DEPTH=1
|
||||
|
||||
NUM_BUILD_THREADS=$(nproc --ignore=1)
|
||||
|
||||
# Google gRPC --------------------------------------------------------------------------------------
|
||||
TAG_LIB_GRPC="v1.59.0"
|
||||
GIT_REPO_LIB_GRPC="https://github.com/grpc/grpc.git"
|
||||
GRPC_REPO_DIR="${SCRIPT_DIR}/../grpc_repo"
|
||||
GRPC_BUILD_DIR="${SCRIPT_DIR}/../grpc_build"
|
||||
SRC_DIR_LIB_GRPC="${GRPC_REPO_DIR}/grpc"
|
||||
|
||||
echo "SRC_DIR_LIB_GRPC: ${SRC_DIR_LIB_GRPC}"
|
||||
echo "GRPC_REPO_DIR: ${GRPC_REPO_DIR}"
|
||||
echo "GRPC_BUILD_DIR: ${GRPC_BUILD_DIR}"
|
||||
|
||||
mkdir -pv ${GRPC_REPO_DIR}
|
||||
|
||||
rm -rf ${GRPC_BUILD_DIR}
|
||||
mkdir -pv ${GRPC_BUILD_DIR}
|
||||
|
||||
mkdir -pv ${CPP_INSTALLED_PACKAGES_DIR}
|
||||
|
||||
if [ -d "${SRC_DIR_LIB_GRPC}" ]; then
|
||||
echo "gRPC source already exists locally. Not cloned again."
|
||||
else
|
||||
( cd ${GRPC_REPO_DIR} && \
|
||||
git clone --depth ${GIT_CLONE_DEPTH} -b ${TAG_LIB_GRPC} ${GIT_REPO_LIB_GRPC} && \
|
||||
cd ${SRC_DIR_LIB_GRPC} && \
|
||||
git submodule update --init --recursive --depth ${GIT_CLONE_DEPTH}
|
||||
)
|
||||
fi
|
||||
|
||||
( cd ${GRPC_BUILD_DIR} && \
|
||||
cmake -G "Unix Makefiles" \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DgRPC_INSTALL=ON \
|
||||
-DEXECUTABLE_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/bin \
|
||||
-DLIBRARY_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/lib \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
-DgRPC_BUILD_CSHARP_EXT=OFF \
|
||||
-DgRPC_BUILD_GRPC_CPP_PLUGIN=ON \
|
||||
-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_PYTHON_PLUGIN=ON \
|
||||
-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF \
|
||||
-Dprotobuf_WITH_ZLIB=ON \
|
||||
-DRE2_BUILD_TESTING=OFF \
|
||||
-DCMAKE_INSTALL_PREFIX=${CPP_INSTALLED_PACKAGES_DIR}/ \
|
||||
${SRC_DIR_LIB_GRPC} && \
|
||||
cmake --build . -- -j ${NUM_BUILD_THREADS} && \
|
||||
cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
||||
)
|
||||
|
||||
rm -rf ${GRPC_BUILD_DIR}
|
||||
rm -rf ${GRPC_REPO_DIR}
|
||||
|
||||
@@ -1,9 +1,22 @@
|
||||
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
set(TARGET myclip)
|
||||
add_library(${TARGET} clip.cpp clip.h)
|
||||
install(TARGETS ${TARGET} LIBRARY)
|
||||
target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if (NOT MSVC)
|
||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||
endif()
|
||||
|
||||
set(TARGET grpc-server)
|
||||
# END CLIP hack
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
set(TARGET grpc-server)
|
||||
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||
set(_REFLECTION grpc++_reflection)
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
link_directories("/opt/homebrew/lib")
|
||||
include_directories("/opt/homebrew/include")
|
||||
@@ -23,7 +36,7 @@ include_directories(${Protobuf_INCLUDE_DIRS})
|
||||
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
# Proto file
|
||||
get_filename_component(hw_proto "../../../../../../pkg/grpc/proto/backend.proto" ABSOLUTE)
|
||||
get_filename_component(hw_proto "../../../../../../backend/backend.proto" ABSOLUTE)
|
||||
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
||||
|
||||
# Generated sources
|
||||
@@ -47,10 +60,10 @@ add_library(hw_grpc_proto
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
${hw_proto_hdrs})
|
||||
${hw_proto_hdrs} )
|
||||
|
||||
add_executable(${TARGET} grpc-server.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
add_executable(${TARGET} grpc-server.cpp json.hpp )
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
LLAMA_VERSION?=
|
||||
LLAMA_VERSION?=d9b33fe95bd257b36c84ee5769cc048230067d6f
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
@@ -27,11 +27,17 @@ llama.cpp/examples/grpc-server:
|
||||
mkdir -p llama.cpp/examples/grpc-server
|
||||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
|
||||
rebuild:
|
||||
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||
rm -rf grpc-server
|
||||
$(MAKE) grpc-server
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
24596
backend/cpp/llama/json.hpp
Normal file
24596
backend/cpp/llama/json.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,6 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
bert "github.com/go-skynet/LocalAI/pkg/backend/llm/bert"
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
@@ -16,7 +15,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &bert.Embeddings{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &StableDiffusion{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package image
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
@@ -1,4 +1,4 @@
|
||||
package bert
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
@@ -5,8 +5,6 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
rwkv "github.com/go-skynet/LocalAI/pkg/backend/llm/rwkv"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
@@ -17,7 +15,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &rwkv.LLM{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
@@ -1,4 +1,4 @@
|
||||
package gpt4all
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
@@ -5,8 +5,6 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
bloomz "github.com/go-skynet/LocalAI/pkg/backend/llm/bloomz"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
@@ -17,7 +15,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
@@ -1,4 +1,4 @@
|
||||
package langchain
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
21
backend/go/llm/langchain/main.go
Normal file
21
backend/go/llm/langchain/main.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package llama
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
@@ -3,8 +3,6 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
llama "github.com/go-skynet/LocalAI/pkg/backend/llm/llama-stable"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
@@ -15,7 +13,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &llama.LLM{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package llama
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
@@ -1,12 +1,12 @@
|
||||
package main
|
||||
|
||||
// GRPC Falcon server
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
tts "github.com/go-skynet/LocalAI/pkg/backend/tts"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
@@ -17,7 +17,7 @@ var (
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &tts.Piper{}); err != nil {
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
21
backend/go/llm/rwkv/main.go
Normal file
21
backend/go/llm/rwkv/main.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package rwkv
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||
transformers "github.com/go-skynet/LocalAI/backend/go/llm/transformers"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
21
backend/go/transcribe/main.go
Normal file
21
backend/go/transcribe/main.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Whisper{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package transcribe
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -1,4 +1,4 @@
|
||||
package transcribe
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
21
backend/go/tts/main.go
Normal file
21
backend/go/tts/main.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Piper{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package tts
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
38
backend/python/README.md
Normal file
38
backend/python/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Common commands about conda environment
|
||||
|
||||
## Create a new empty conda environment
|
||||
|
||||
```
|
||||
conda create --name <env-name> python=<your version> -y
|
||||
|
||||
conda create --name autogptq python=3.11 -y
|
||||
```
|
||||
|
||||
## To activate the environment
|
||||
|
||||
As of conda 4.4
|
||||
```
|
||||
conda activate autogptq
|
||||
```
|
||||
|
||||
The conda version older than 4.4
|
||||
|
||||
```
|
||||
source activate autogptq
|
||||
```
|
||||
|
||||
## Install the packages to your environment
|
||||
|
||||
Sometimes you need to install the packages from the conda-forge channel
|
||||
|
||||
By using `conda`
|
||||
```
|
||||
conda install <your-package-name>
|
||||
|
||||
conda install -c conda-forge <your package-name>
|
||||
```
|
||||
|
||||
Or by using `pip`
|
||||
```
|
||||
pip install <your-package-name>
|
||||
```
|
||||
5
backend/python/autogptq/Makefile
Normal file
5
backend/python/autogptq/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
.PHONY: autogptq
|
||||
autogptq:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name autogptq --file autogptq.yml
|
||||
@echo "Virtual environment created."
|
||||
5
backend/python/autogptq/README.md
Normal file
5
backend/python/autogptq/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Creating a separate environment for the autogptq project
|
||||
|
||||
```
|
||||
make autogptq
|
||||
```
|
||||
@@ -1,15 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
import grpc
|
||||
from concurrent import futures
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
import grpc
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
from auto_gptq import AutoGPTQForCausalLM
|
||||
from transformers import AutoTokenizer
|
||||
from transformers import TextGenerationPipeline
|
||||
|
||||
86
backend/python/autogptq/autogptq.yml
Normal file
86
backend/python/autogptq/autogptq.yml
Normal file
@@ -0,0 +1,86 @@
|
||||
name: autogptq
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.23.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- auto-gptq==0.4.2
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- dill==0.3.7
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- safetensors==0.3.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- torch==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==2.0.6
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
61
backend/python/autogptq/backend_pb2.py
Normal file
61
backend/python/autogptq/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
14
backend/python/autogptq/run.sh
Executable file
14
backend/python/autogptq/run.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the autogptq server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate autogptq
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/autogptq.py $@
|
||||
5
backend/python/bark/Makefile
Normal file
5
backend/python/bark/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
.PHONY: ttsbark
|
||||
ttsbark:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name ttsbark --file ttsbark.yml
|
||||
@echo "Virtual environment created."
|
||||
16
backend/python/bark/README.md
Normal file
16
backend/python/bark/README.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# Creating a separate environment for ttsbark project
|
||||
|
||||
```
|
||||
make ttsbark
|
||||
```
|
||||
|
||||
# Testing the gRPC server
|
||||
|
||||
```
|
||||
<The path of your python interpreter> -m unittest test_ttsbark.py
|
||||
```
|
||||
|
||||
For example
|
||||
```
|
||||
/opt/conda/envs/bark/bin/python -m unittest extra/grpc/bark/test_ttsbark.py
|
||||
``````
|
||||
61
backend/python/bark/backend_pb2.py
Normal file
61
backend/python/bark/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
14
backend/python/bark/run.sh
Executable file
14
backend/python/bark/run.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the ttsbark server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate ttsbark
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/ttsbark.py $@
|
||||
32
backend/python/bark/test_ttsbark.py
Normal file
32
backend/python/bark/test_ttsbark.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
self.service = subprocess.Popen(["python3", "ttsbark.py", "--addr", "localhost:50051"])
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
time.sleep(2)
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,18 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
import grpc
|
||||
"""
|
||||
This is an extra gRPC server of LocalAI for Bark TTS
|
||||
"""
|
||||
from concurrent import futures
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
||||
from pathlib import Path
|
||||
from bark import SAMPLE_RATE, generate_audio, preload_models
|
||||
from scipy.io.wavfile import write as write_wav
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
from bark import SAMPLE_RATE, generate_audio, preload_models
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
@@ -20,6 +24,9 @@ MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
BackendServicer is the class that implements the gRPC service
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
def LoadModel(self, request, context):
|
||||
@@ -83,4 +90,4 @@ if __name__ == "__main__":
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
serve(args.addr)
|
||||
96
backend/python/bark/ttsbark.yml
Normal file
96
backend/python/bark/ttsbark.yml
Normal file
@@ -0,0 +1,96 @@
|
||||
name: bark
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.23.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors==0.3.3
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- torch==2.1.0
|
||||
- torchaudio==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
prefix: /opt/conda/envs/bark
|
||||
11
backend/python/diffusers/Makefile
Normal file
11
backend/python/diffusers/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
.PHONY: diffusers
|
||||
diffusers:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name diffusers --file diffusers.yml
|
||||
@echo "Virtual environment created."
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running diffusers..."
|
||||
bash run.sh
|
||||
@echo "Diffusers run."
|
||||
5
backend/python/diffusers/README.md
Normal file
5
backend/python/diffusers/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Creating a separate environment for the diffusers project
|
||||
|
||||
```
|
||||
make diffusers
|
||||
```
|
||||
@@ -1,27 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
import grpc
|
||||
from concurrent import futures
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
|
||||
# import diffusers
|
||||
import torch
|
||||
from torch import autocast
|
||||
from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from compel import Compel
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
import torch
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusionImg2ImgPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
|
||||
from compel import Compel
|
||||
|
||||
from transformers import CLIPTextModel
|
||||
from enum import Enum
|
||||
from collections import defaultdict
|
||||
from safetensors.torch import load_file
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
COMPEL=os.environ.get("COMPEL", "1") == "1"
|
||||
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
|
||||
61
backend/python/diffusers/backend_pb2.py
Normal file
61
backend/python/diffusers/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
74
backend/python/diffusers/diffusers.yml
Normal file
74
backend/python/diffusers/diffusers.yml
Normal file
@@ -0,0 +1,74 @@
|
||||
name: diffusers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.23.0
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- compel==2.0.2
|
||||
- diffusers==0.21.4
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.17.3
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- packaging==23.2
|
||||
- pillow==10.0.1
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyparsing==3.1.1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- safetensors==0.4.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.1
|
||||
- torch==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
14
backend/python/diffusers/run.sh
Executable file
14
backend/python/diffusers/run.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/backend_diffusers.py $@
|
||||
11
backend/python/exllama/Makefile
Normal file
11
backend/python/exllama/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
.PHONY: exllama
|
||||
exllama:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name exllama --file exllama.yml
|
||||
@echo "Virtual environment created."
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running exllama..."
|
||||
bash run.sh
|
||||
@echo "exllama run."
|
||||
5
backend/python/exllama/README.md
Normal file
5
backend/python/exllama/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Creating a separate environment for the exllama project
|
||||
|
||||
```
|
||||
make exllama
|
||||
```
|
||||
61
backend/python/exllama/backend_pb2.py
Normal file
61
backend/python/exllama/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
@@ -63,6 +63,19 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
config = ExLlamaConfig(model_config_path) # create config from config.json
|
||||
config.model_path = model_path # supply path to model weights file
|
||||
if (request.ContextSize):
|
||||
config.max_seq_len = request.ContextSize # override max sequence length
|
||||
config.max_attention_size = request.ContextSize**2 # Should be set to context_size^2.
|
||||
# https://github.com/turboderp/exllama/issues/220#issuecomment-1720324163
|
||||
|
||||
# Set Rope scaling.
|
||||
if (request.RopeFreqScale):
|
||||
# Alpha value for Rope scaling.
|
||||
# Higher value increases context but adds perplexity.
|
||||
# alpha_value and compress_pos_emb are mutually exclusive.
|
||||
# https://github.com/turboderp/exllama/issues/115
|
||||
config.alpha_value = request.RopeFreqScale
|
||||
config.calculate_rotary_embedding_base()
|
||||
|
||||
model = ExLlama(config) # create ExLlama instance and load the weights
|
||||
tokenizer = ExLlamaTokenizer(tokenizer_path) # create tokenizer from tokenizer model file
|
||||
55
backend/python/exllama/exllama.yml
Normal file
55
backend/python/exllama/exllama.yml
Normal file
@@ -0,0 +1,55 @@
|
||||
name: exllama
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- ninja==1.11.1
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- protobuf==4.24.4
|
||||
- safetensors==0.3.2
|
||||
- sentencepiece==0.1.99
|
||||
- sympy==1.12
|
||||
- torch==2.1.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
prefix: /opt/conda/envs/exllama
|
||||
14
backend/python/exllama/run.sh
Executable file
14
backend/python/exllama/run.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/exllama.py $@
|
||||
11
backend/python/petals/Makefile
Normal file
11
backend/python/petals/Makefile
Normal file
@@ -0,0 +1,11 @@
|
||||
.PHONY: petals
|
||||
petals:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name petals --file petals.yml
|
||||
@echo "Virtual environment created."
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running petals..."
|
||||
bash run.sh
|
||||
@echo "petals run."
|
||||
61
backend/python/petals/backend_pb2.py
Normal file
61
backend/python/petals/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user