mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
1 Commits
v2.2.0
...
renovate/g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
02397f6727 |
22
.env
22
.env
@@ -66,24 +66,4 @@ MODELS_PATH=/models
|
||||
### Python backends GRPC max workers
|
||||
### Default number of workers for GRPC Python backends.
|
||||
### This actually controls wether a backend can process multiple requests or not.
|
||||
# PYTHON_GRPC_MAX_WORKERS=1
|
||||
|
||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||
# LLAMACPP_PARALLEL=1
|
||||
|
||||
### Enable to run parallel requests
|
||||
# PARALLEL_REQUESTS=true
|
||||
|
||||
### Watchdog settings
|
||||
###
|
||||
# Enables watchdog to kill backends that are inactive for too much time
|
||||
# WATCHDOG_IDLE=true
|
||||
#
|
||||
# Enables watchdog to kill backends that are busy for too much time
|
||||
# WATCHDOG_BUSY=true
|
||||
#
|
||||
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
||||
# WATCHDOG_IDLE_TIMEOUT=5m
|
||||
#
|
||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||
# WATCHDOG_BUSY_TIMEOUT=5m
|
||||
# PYTHON_GRPC_MAX_WORKERS=1
|
||||
7
.github/bump_docs.sh
vendored
7
.github/bump_docs.sh
vendored
@@ -1,7 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
REPO=$1
|
||||
|
||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
|
||||
|
||||
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
||||
5
.github/workflows/bump_deps.yaml
vendored
5
.github/workflows/bump_deps.yaml
vendored
@@ -12,9 +12,6 @@ jobs:
|
||||
- repository: "go-skynet/go-llama.cpp"
|
||||
variable: "GOLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "ggerganov/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||
branch: "master"
|
||||
@@ -44,7 +41,7 @@ jobs:
|
||||
branch: "master"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
- name: Bump dependencies 🔧
|
||||
run: |
|
||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||
|
||||
31
.github/workflows/bump_docs.yaml
vendored
31
.github/workflows/bump_docs.yaml
vendored
@@ -1,31 +0,0 @@
|
||||
name: Bump dependencies
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
bump:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- repository: "mudler/LocalAI"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Bump dependencies 🔧
|
||||
run: |
|
||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v5
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||
branch: "update/docs"
|
||||
body: Bump of ${{ matrix.repository }} version inside docs
|
||||
signoff: true
|
||||
|
||||
|
||||
|
||||
63
.github/workflows/disabled/test-gpu.yml
vendored
63
.github/workflows/disabled/test-gpu.yml
vendored
@@ -1,63 +0,0 @@
|
||||
---
|
||||
name: 'GPU tests'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
concurrency:
|
||||
group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: gpu
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.21.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: ${{ matrix.go-version }}
|
||||
# You can test your matrix by printing the current Go version
|
||||
- name: Display Go version
|
||||
run: go version
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget
|
||||
- name: Build
|
||||
run: |
|
||||
if [ ! -e /run/systemd/system ]; then
|
||||
sudo mkdir /run/systemd/system
|
||||
fi
|
||||
sudo mkdir -p /host/tests/${{ github.head_ref || github.ref }}
|
||||
sudo chmod -R 777 /host/tests/${{ github.head_ref || github.ref }}
|
||||
make \
|
||||
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||
BUILD_TYPE=cublas \
|
||||
prepare-e2e run-e2e-image test-e2e
|
||||
- name: Release space from worker ♻
|
||||
if: always()
|
||||
run: |
|
||||
sudo rm -rf build || true
|
||||
sudo rm -rf bin || true
|
||||
sudo rm -rf dist || true
|
||||
sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt
|
||||
sudo cat logs.txt || true
|
||||
sudo rm -rf logs.txt
|
||||
make clean || true
|
||||
make \
|
||||
TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \
|
||||
teardown-e2e || true
|
||||
sudo rm -rf /host/tests/${{ github.head_ref || github.ref }} || true
|
||||
docker system prune -f -a --volumes || true
|
||||
212
.github/workflows/image.yml
vendored
212
.github/workflows/image.yml
vendored
@@ -14,144 +14,128 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
extras-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
dockerPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
docker:
|
||||
strategy:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
#platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
cuda-major-version: 11
|
||||
cuda-minor-version: 7
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-major-version: 12
|
||||
cuda-minor-version: 1
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
cuda-major-version: 11
|
||||
cuda-minor-version: 7
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-major-version: 12
|
||||
cuda-minor-version: 1
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: ''
|
||||
#platforms: 'linux/amd64,linux/arm64'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: ''
|
||||
ffmpeg: ''
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
dockerPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Release space from worker
|
||||
run: |
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
df -h
|
||||
echo
|
||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo apt-get remove -y '^mono-.*' || true
|
||||
sudo apt-get remove -y '^ghc-.*' || true
|
||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
sudo apt-get remove -y 'php.*' || true
|
||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
sudo apt-get remove -y '^google-.*' || true
|
||||
sudo apt-get remove -y azure-cli || true
|
||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
sudo apt-get remove -y '^gfortran-.*' || true
|
||||
sudo apt-get autoremove -y
|
||||
sudo apt-get clean
|
||||
echo
|
||||
echo "Listing top largest packages"
|
||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
head -n 30 <<< "${pkgs}"
|
||||
echo
|
||||
sudo rm -rfv build || true
|
||||
df -h
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: quay.io/go-skynet/local-ai
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ matrix.tag-latest }}
|
||||
suffix=${{ matrix.tag-suffix }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ matrix.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }}
|
||||
FFMPEG=${{ matrix.ffmpeg }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
147
.github/workflows/image_build.yml
vendored
147
.github/workflows/image_build.yml
vendored
@@ -1,147 +0,0 @@
|
||||
---
|
||||
name: 'build container images (reusable)'
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
build-type:
|
||||
description: 'Build type'
|
||||
default: ''
|
||||
type: string
|
||||
cuda-major-version:
|
||||
description: 'CUDA major version'
|
||||
default: "11"
|
||||
type: string
|
||||
cuda-minor-version:
|
||||
description: 'CUDA minor version'
|
||||
default: "7"
|
||||
type: string
|
||||
platforms:
|
||||
description: 'Platforms'
|
||||
default: ''
|
||||
type: string
|
||||
tag-latest:
|
||||
description: 'Tag latest'
|
||||
default: ''
|
||||
type: string
|
||||
tag-suffix:
|
||||
description: 'Tag suffix'
|
||||
default: ''
|
||||
type: string
|
||||
ffmpeg:
|
||||
description: 'FFMPEG'
|
||||
default: ''
|
||||
type: string
|
||||
image-type:
|
||||
description: 'Image type'
|
||||
default: ''
|
||||
type: string
|
||||
runs-on:
|
||||
description: 'Runs on'
|
||||
required: true
|
||||
default: ''
|
||||
type: string
|
||||
secrets:
|
||||
dockerUsername:
|
||||
required: true
|
||||
dockerPassword:
|
||||
required: true
|
||||
jobs:
|
||||
reusable_image-build:
|
||||
runs-on: ${{ inputs.runs-on }}
|
||||
steps:
|
||||
- name: Force Install GIT latest
|
||||
run: |
|
||||
sudo apt-get update \
|
||||
&& sudo apt-get install -y software-properties-common \
|
||||
&& sudo apt-get update \
|
||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y git
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
# - name: Release space from worker
|
||||
# run: |
|
||||
# echo "Listing top largest packages"
|
||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
# head -n 30 <<< "${pkgs}"
|
||||
# echo
|
||||
# df -h
|
||||
# echo
|
||||
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||
# sudo rm -rf /usr/local/lib/android
|
||||
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||
# sudo rm -rf /usr/share/dotnet
|
||||
# sudo apt-get remove -y '^mono-.*' || true
|
||||
# sudo apt-get remove -y '^ghc-.*' || true
|
||||
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||
# sudo apt-get remove -y 'php.*' || true
|
||||
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||
# sudo apt-get remove -y '^google-.*' || true
|
||||
# sudo apt-get remove -y azure-cli || true
|
||||
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||
# sudo apt-get remove -y '^gfortran-.*' || true
|
||||
# sudo apt-get remove -y microsoft-edge-stable || true
|
||||
# sudo apt-get remove -y firefox || true
|
||||
# sudo apt-get remove -y powershell || true
|
||||
# sudo apt-get remove -y r-base-core || true
|
||||
# sudo apt-get autoremove -y
|
||||
# sudo apt-get clean
|
||||
# echo
|
||||
# echo "Listing top largest packages"
|
||||
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||
# head -n 30 <<< "${pkgs}"
|
||||
# echo
|
||||
# sudo rm -rfv build || true
|
||||
# df -h
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: quay.io/go-skynet/local-ai
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.dockerUsername }}
|
||||
password: ${{ secrets.dockerPassword }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||
FFMPEG=${{ inputs.ffmpeg }}
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
- name: job summary
|
||||
run: |
|
||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||
18
.github/workflows/release.yaml
vendored
18
.github/workflows/release.yaml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v4
|
||||
@@ -29,12 +29,6 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12 install
|
||||
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
@@ -66,26 +60,18 @@ jobs:
|
||||
runs-on: macOS-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '>=1.21.0'
|
||||
- name: Dependencies
|
||||
run: |
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && make -j12 install && rm -rf grpc
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||
BUILD_ID: "${{ matrix.build }}"
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
|
||||
250
.github/workflows/test-extra.yml
vendored
250
.github/workflows/test-extra.yml
vendored
@@ -1,250 +0,0 @@
|
||||
---
|
||||
name: 'Tests extras backends'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-extra-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
tests-transformers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test transformers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make -C backend/python/transformers
|
||||
make -C backend/python/transformers test
|
||||
|
||||
tests-sentencetransformers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test sentencetransformers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make -C backend/python/sentencetransformers
|
||||
make -C backend/python/sentencetransformers test
|
||||
|
||||
tests-diffusers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test diffusers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make -C backend/python/diffusers
|
||||
make -C backend/python/diffusers test
|
||||
|
||||
|
||||
tests-transformers-musicgen:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test transformers-musicgen
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make -C backend/python/transformers-musicgen
|
||||
make -C backend/python/transformers-musicgen test
|
||||
|
||||
|
||||
|
||||
tests-petals:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test petals
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make -C backend/python/petals
|
||||
make -C backend/python/petals test
|
||||
|
||||
|
||||
|
||||
tests-bark:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test bark
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make -C backend/python/bark
|
||||
make -C backend/python/bark test
|
||||
|
||||
|
||||
# Below tests needs GPU. Commented out for now
|
||||
# TODO: Re-enable as soon as we have GPU nodes
|
||||
# tests-vllm:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# - name: Test vllm
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make -C backend/python/vllm
|
||||
# make -C backend/python/vllm test
|
||||
# tests-vallex:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# - name: Test vall-e-x
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make -C backend/python/vall-e-x
|
||||
# make -C backend/python/vall-e-x test
|
||||
56
.github/workflows/test.yml
vendored
56
.github/workflows/test.yml
vendored
@@ -14,7 +14,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
tests-linux:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
sudo rm -rfv build || true
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
@@ -67,42 +67,38 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
|
||||
sudo apt-get install -y ca-certificates cmake curl patch
|
||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||
sudo pip install -r extra/requirements.txt
|
||||
|
||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||
make sources/go-piper && \
|
||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12 install
|
||||
sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
|
||||
curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
|
||||
tar -xzvf - && \
|
||||
mkdir -p "spdlog-1.11.0/build" && \
|
||||
cd "spdlog-1.11.0/build" && \
|
||||
cmake .. && \
|
||||
make -j8 && \
|
||||
sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
||||
cd /build && \
|
||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
|
||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
||||
sudo ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
||||
sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
||||
- name: Test
|
||||
run: |
|
||||
GO_TAGS="stablediffusion tts" make test
|
||||
ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
|
||||
|
||||
tests-apple:
|
||||
macOS-latest:
|
||||
runs-on: macOS-latest
|
||||
strategy:
|
||||
matrix:
|
||||
go-version: ['1.21.x']
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
@@ -112,14 +108,6 @@ jobs:
|
||||
# You can test your matrix by printing the current Go version
|
||||
- name: Display Go version
|
||||
run: go version
|
||||
- name: Dependencies
|
||||
run: |
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && make -j12 install && rm -rf grpc
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
||||
12
.gitignore
vendored
12
.gitignore
vendored
@@ -1,11 +1,15 @@
|
||||
# go-llama build artifacts
|
||||
/sources/
|
||||
go-llama
|
||||
go-llama-stable
|
||||
/gpt4all
|
||||
go-stable-diffusion
|
||||
go-piper
|
||||
/go-bert
|
||||
go-ggllm
|
||||
/piper
|
||||
__pycache__/
|
||||
*.a
|
||||
get-sources
|
||||
prepare-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
/backend/cpp/llama/llama.cpp
|
||||
|
||||
go-ggml-transformers
|
||||
go-gpt2
|
||||
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,3 +0,0 @@
|
||||
[submodule "docs/themes/hugo-theme-relearn"]
|
||||
path = docs/themes/hugo-theme-relearn
|
||||
url = https://github.com/McShelby/hugo-theme-relearn.git
|
||||
174
Dockerfile
174
Dockerfile
@@ -1,29 +1,22 @@
|
||||
ARG GO_VERSION=1.21-bullseye
|
||||
ARG IMAGE_TYPE=extras
|
||||
# extras or core
|
||||
|
||||
|
||||
FROM golang:$GO_VERSION as requirements-core
|
||||
FROM golang:$GO_VERSION as requirements
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
ARG SPDLOG_VERSION="1.11.0"
|
||||
ARG PIPER_PHONEMIZE_VERSION='1.0.0'
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
||||
|
||||
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py,autogptq:/build/extra/grpc/autogptq/autogptq.py,bark:/build/extra/grpc/bark/ttsbark.py,diffusers:/build/extra/grpc/diffusers/backend_diffusers.py,exllama:/build/extra/grpc/exllama/exllama.py,vall-e-x:/build/extra/grpc/vall-e-x/ttsvalle.py,vllm:/build/extra/grpc/vllm/backend_vllm.py"
|
||||
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
|
||||
|
||||
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
apt-get install -y ca-certificates cmake curl patch pip
|
||||
|
||||
# Use the variables in subsequent instructions
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
@@ -37,52 +30,66 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
||||
rm -f cuda-keyring_1.0-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
; fi
|
||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get install -y \
|
||||
libopenblas-dev \
|
||||
libopencv-dev \
|
||||
&& apt-get clean
|
||||
|
||||
# Set up OpenCV
|
||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN test -n "$TARGETARCH" \
|
||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||
|
||||
# Extras requirements
|
||||
FROM requirements-core as requirements-extras
|
||||
|
||||
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y conda
|
||||
|
||||
COPY extra/requirements.txt /build/extra/requirements.txt
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
|
||||
fi
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
|
||||
fi
|
||||
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
|
||||
|
||||
# Vall-e-X
|
||||
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# OpenBLAS requirements
|
||||
RUN apt-get install -y libopenblas-dev
|
||||
|
||||
# Stable Diffusion requirements
|
||||
RUN apt-get install -y libopencv-dev && \
|
||||
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
|
||||
# piper requirements
|
||||
# Use pre-compiled Piper phonemization library (includes onnxruntime)
|
||||
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
|
||||
RUN test -n "$TARGETARCH" \
|
||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||
|
||||
RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
|
||||
tar -xzvf - && \
|
||||
mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
|
||||
cd "spdlog-${SPDLOG_VERSION}/build" && \
|
||||
cmake .. && \
|
||||
make -j8 && \
|
||||
cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
|
||||
cd /build && \
|
||||
mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
|
||||
curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \
|
||||
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
|
||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
|
||||
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
|
||||
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
|
||||
# \
|
||||
# ; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE} as builder
|
||||
FROM requirements as builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
ARG GRPC_BACKENDS
|
||||
ARG BUILD_GRPC=true
|
||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||
|
||||
ENV GO_TAGS=${GO_TAGS}
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
@@ -90,47 +97,28 @@ ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY Makefile .
|
||||
RUN make get-sources
|
||||
COPY go.mod .
|
||||
RUN make prepare
|
||||
COPY . .
|
||||
COPY .git .
|
||||
RUN make prepare
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && make -j12 install \
|
||||
; fi
|
||||
|
||||
# Rebuild with defaults backends
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||
; fi
|
||||
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE}
|
||||
FROM requirements
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
ARG TARGETARCH
|
||||
ARG IMAGE_TYPE=extras
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV REBUILD=false
|
||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get install -y ffmpeg \
|
||||
@@ -143,56 +131,16 @@ WORKDIR /build
|
||||
# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
|
||||
# https://github.com/go-skynet/LocalAI/pull/434
|
||||
COPY . .
|
||||
|
||||
COPY --from=builder /build/sources ./sources/
|
||||
COPY --from=builder /build/grpc ./grpc/
|
||||
|
||||
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
|
||||
|
||||
# Copy the binary
|
||||
RUN make prepare-sources
|
||||
COPY --from=builder /build/local-ai ./
|
||||
|
||||
# Copy shared libraries for piper
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/autogptq \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/bark \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/exllama2 \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/petals \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers-musicgen \
|
||||
; fi
|
||||
# Copy VALLE-X as it's not a real "lib"
|
||||
RUN cp -rfv /usr/lib/vall-e-x/* ./
|
||||
|
||||
# To resolve exllama import error
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
|
||||
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
|
||||
fi
|
||||
# Define the health check command
|
||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.network.client</key>
|
||||
<true/>
|
||||
<key>com.apple.security.network.server</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
472
Makefile
472
Makefile
@@ -4,12 +4,10 @@ GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
||||
GOLLAMA_VERSION?=1676dcd7a139b6cdfbaea5fd67f46dc25d9d8bcf
|
||||
|
||||
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||
|
||||
CPPLLAMA_VERSION?=328b83de23b33240e28f4e74900d1d06726f5eb1
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
||||
@@ -19,33 +17,34 @@ GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
|
||||
|
||||
# go-rwkv version
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=8f6d062fa80ed4ac4a00d1ac53aa4de54183fffe
|
||||
RWKV_VERSION?=c898cd0f62df8f2a7830e53d1d513bef4f6f792b
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=9286d3f584240ba58bd44a1bd1e85141579c78d4
|
||||
WHISPER_CPP_VERSION?=85ed71aaec8e0612a84c0b67804bde75aa75a273
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
|
||||
# go-piper version
|
||||
PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07
|
||||
PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
|
||||
|
||||
# go-bloomz version
|
||||
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_VERSION?=902db5f066fd137697e3b69d0fa10d4782bd2c2f
|
||||
STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
|
||||
|
||||
# Go-ggllm
|
||||
GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
export CMAKE_ARGS?=
|
||||
|
||||
CGO_LDFLAGS?=
|
||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||
GO_TAGS?=
|
||||
BUILD_ID?=git
|
||||
|
||||
TEST_DIR=/tmp/test
|
||||
|
||||
RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||
|
||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||
# go tool nm ./local-ai | grep Commit
|
||||
LD_FLAGS?=
|
||||
@@ -53,6 +52,7 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION
|
||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
|
||||
OPTIONAL_TARGETS?=
|
||||
ESPEAK_DATA?=
|
||||
|
||||
OS := $(shell uname -s)
|
||||
ARCH := $(shell uname -m)
|
||||
@@ -62,46 +62,33 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
# Default Docker bridge IP
|
||||
E2E_BRIDGE_IP?=172.17.0.1
|
||||
|
||||
ifndef UNAME_S
|
||||
UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifeq ($(OS),Darwin)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CGO_LDFLAGS += -lcblas -framework Accelerate
|
||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||
endif
|
||||
|
||||
# on OSX, if BUILD_TYPE is blank, we should default to use Metal
|
||||
ifeq ($(BUILD_TYPE),)
|
||||
BUILD_TYPE=metal
|
||||
# disable metal if on Darwin and any other value is explicitly passed.
|
||||
else ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||
endif
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
# explicit disable metal if on Darwin and metal is disabled
|
||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),openblas)
|
||||
CGO_LDFLAGS+=-lopenblas
|
||||
export WHISPER_OPENBLAS=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
export LLAMA_CUBLAS=1
|
||||
export WHISPER_CUBLAS=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
# Llama-stable has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export WHISPER_HIPBLAS=1
|
||||
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
@@ -111,12 +98,10 @@ endif
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
export LLAMA_METAL=1
|
||||
export WHISPER_METAL=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),clblas)
|
||||
CGO_LDFLAGS+=-lOpenCL -lclblast
|
||||
export WHISPER_CLBLAST=1
|
||||
endif
|
||||
|
||||
# glibc-static or glibc-devel-static required
|
||||
@@ -132,135 +117,146 @@ endif
|
||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||
PIPER_CGO_CXXFLAGS+=-I$(shell pwd)/sources/go-piper/piper/src/cpp -I$(shell pwd)/sources/go-piper/piper/build/fi/include -I$(shell pwd)/sources/go-piper/piper/build/pi/include -I$(shell pwd)/sources/go-piper/piper/build/si/include
|
||||
PIPER_CGO_LDFLAGS+=-L$(shell pwd)/sources/go-piper/piper/build/fi/lib -L$(shell pwd)/sources/go-piper/piper/build/pi/lib -L$(shell pwd)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||
|
||||
# If empty, then we build all
|
||||
ifeq ($(GRPC_BACKENDS),)
|
||||
GRPC_BACKENDS=$(ALL_GRPC_BACKENDS)
|
||||
endif
|
||||
GRPC_BACKENDS?=backend-assets/grpc/langchain-huggingface backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/llama-stable backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
||||
|
||||
.PHONY: all test build vendor
|
||||
|
||||
all: help
|
||||
|
||||
## GPT4ALL
|
||||
sources/gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
|
||||
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## go-ggllm
|
||||
go-ggllm:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
|
||||
cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-ggllm/libggllm.a: go-ggllm
|
||||
$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
|
||||
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
||||
go-piper:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
|
||||
cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## BERT embeddings
|
||||
sources/go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
||||
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
|
||||
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
## stable diffusion
|
||||
sources/go-stable-diffusion:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||
go-stable-diffusion:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion go-stable-diffusion
|
||||
cd go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-stable-diffusion/libstablediffusion.a:
|
||||
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||
go-stable-diffusion/libstablediffusion.a:
|
||||
$(MAKE) -C go-stable-diffusion libstablediffusion.a
|
||||
|
||||
## RWKV
|
||||
sources/go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
|
||||
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
||||
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
||||
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
go-rwkv/librwkv.a: go-rwkv
|
||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
|
||||
sources/go-bert/libgobert.a: sources/go-bert
|
||||
$(MAKE) -C sources/go-bert libgobert.a
|
||||
## bloomz
|
||||
bloomz:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
||||
cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
bloomz/libbloomz.a: bloomz
|
||||
cd bloomz && make libbloomz.a
|
||||
|
||||
go-bert/libgobert.a: go-bert
|
||||
$(MAKE) -C go-bert libgobert.a
|
||||
|
||||
backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
mkdir -p backend-assets/gpt4all
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||
@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||
|
||||
backend-assets/espeak-ng-data: sources/go-piper
|
||||
backend-assets/espeak-ng-data:
|
||||
mkdir -p backend-assets/espeak-ng-data
|
||||
$(MAKE) -C sources/go-piper piper.o
|
||||
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||
ifdef ESPEAK_DATA
|
||||
@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
|
||||
else
|
||||
@echo "ESPEAK_DATA not set, skipping tts. Note that this will break the tts functionality."
|
||||
@touch backend-assets/espeak-ng-data/keep
|
||||
endif
|
||||
|
||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
|
||||
## CEREBRAS GPT
|
||||
sources/go-ggml-transformers:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp sources/go-ggml-transformers
|
||||
cd sources/go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
||||
go-ggml-transformers:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
|
||||
cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-ggml-transformers/libtransformers.a: sources/go-ggml-transformers
|
||||
$(MAKE) -C sources/go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
|
||||
go-ggml-transformers/libtransformers.a: go-ggml-transformers
|
||||
$(MAKE) -C go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
|
||||
|
||||
sources/whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && make libwhisper.a
|
||||
whisper.cpp/libwhisper.a: whisper.cpp
|
||||
cd whisper.cpp && make libwhisper.a
|
||||
|
||||
sources/go-llama:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
|
||||
cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
go-llama:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-llama-ggml:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
||||
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
go-llama-stable:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-stable
|
||||
cd go-llama-stable && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/go-llama/libbinding.a: sources/go-llama
|
||||
$(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
|
||||
|
||||
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
||||
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
go-llama-stable/libbinding.a: go-llama-stable
|
||||
$(MAKE) -C go-llama-stable BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main
|
||||
go-piper/libpiper_binding.a:
|
||||
$(MAKE) -C go-piper libpiper_binding.a example/main
|
||||
|
||||
backend/cpp/llama/llama.cpp:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||
|
||||
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/go-ggml-transformers sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion
|
||||
get-sources: go-llama go-llama-stable go-ggllm go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
|
||||
touch $@
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/sources/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/sources/go-ggml-transformers
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/sources/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(shell pwd)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/sources/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
$(GOCMD) mod download
|
||||
touch $@
|
||||
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama clean
|
||||
$(MAKE) -C sources/go-llama-ggml clean
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C sources/go-ggml-transformers clean
|
||||
$(MAKE) -C sources/go-rwkv clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-bert clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C go-llama clean
|
||||
$(MAKE) -C go-llama-stable clean
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C go-ggml-transformers clean
|
||||
$(MAKE) -C go-rwkv clean
|
||||
$(MAKE) -C whisper.cpp clean
|
||||
$(MAKE) -C go-stable-diffusion clean
|
||||
$(MAKE) -C go-bert clean
|
||||
$(MAKE) -C bloomz clean
|
||||
$(MAKE) -C go-piper clean
|
||||
$(MAKE) -C go-ggllm clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||
@@ -269,13 +265,21 @@ prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||
clean: ## Remove build related file
|
||||
$(GOCMD) clean -cache
|
||||
rm -f prepare
|
||||
rm -rf ./sources
|
||||
rm -rf ./go-llama
|
||||
rm -rf ./gpt4all
|
||||
rm -rf ./go-llama-stable
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf ./go-stable-diffusion
|
||||
rm -rf ./go-ggml-transformers
|
||||
rm -rf ./backend-assets
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf ./go-bert
|
||||
rm -rf ./bloomz
|
||||
rm -rf ./whisper.cpp
|
||||
rm -rf ./go-piper
|
||||
rm -rf ./go-ggllm
|
||||
rm -rf $(BINARY_NAME)
|
||||
rm -rf release/
|
||||
rm -rf ./backend/cpp/grpc/grpc_repo
|
||||
rm -rf ./backend/cpp/grpc/build
|
||||
rm -rf ./backend/cpp/grpc/installed_packages
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
|
||||
## Build:
|
||||
|
||||
@@ -291,9 +295,6 @@ dist: build
|
||||
mkdir -p release
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
||||
|
||||
osx-signed: build
|
||||
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
||||
|
||||
## Run
|
||||
run: prepare ## run local-ai
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
|
||||
@@ -301,12 +302,12 @@ run: prepare ## run local-ai
|
||||
test-models/testmodel:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||
wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
||||
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
wget https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||
cp tests/models_fixtures/* test-models
|
||||
|
||||
prepare-test: grpcs
|
||||
@@ -317,34 +318,14 @@ test: prepare test-models/testmodel grpcs
|
||||
@echo 'Running tests'
|
||||
export GO_TAGS="tts stablediffusion"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 -v -r ./api ./pkg
|
||||
$(MAKE) test-gpt4all
|
||||
$(MAKE) test-llama
|
||||
$(MAKE) test-llama-gguf
|
||||
$(MAKE) test-tts
|
||||
$(MAKE) test-stablediffusion
|
||||
|
||||
prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||
|
||||
test-e2e:
|
||||
@echo 'Running e2e tests'
|
||||
BUILD_TYPE=$(BUILD_TYPE) \
|
||||
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
|
||||
|
||||
teardown-e2e:
|
||||
rm -rf $(TEST_DIR) || true
|
||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||
|
||||
test-gpt4all: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
|
||||
@@ -384,153 +365,102 @@ help: ## Show this help.
|
||||
protogen: protogen-go protogen-python
|
||||
|
||||
protogen-go:
|
||||
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||
backend/backend.proto
|
||||
protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \
|
||||
pkg/grpc/proto/backend.proto
|
||||
|
||||
protogen-python:
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/autogptq/ --grpc_python_out=extra/grpc/autogptq/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/exllama/ --grpc_python_out=extra/grpc/exllama/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/bark/ --grpc_python_out=extra/grpc/bark/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/diffusers/ --grpc_python_out=extra/grpc/diffusers/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vall-e-x/ --grpc_python_out=extra/grpc/vall-e-x/ pkg/grpc/proto/backend.proto
|
||||
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/vllm/ --grpc_python_out=extra/grpc/vllm/ pkg/grpc/proto/backend.proto
|
||||
|
||||
## GRPC
|
||||
# Note: it is duplicated in the Dockerfile
|
||||
prepare-extra-conda-environments:
|
||||
$(MAKE) -C backend/python/autogptq
|
||||
$(MAKE) -C backend/python/bark
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/sentencetransformers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/transformers-musicgen
|
||||
$(MAKE) -C backend/python/vall-e-x
|
||||
$(MAKE) -C backend/python/exllama
|
||||
$(MAKE) -C backend/python/petals
|
||||
$(MAKE) -C backend/python/exllama2
|
||||
|
||||
prepare-test-extra:
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
|
||||
test-extra: prepare-test-extra
|
||||
$(MAKE) -C backend/python/transformers test
|
||||
$(MAKE) -C backend/python/diffusers test
|
||||
|
||||
backend-assets/grpc:
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/sources/go-llama
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-llama LIBRARY_PATH=$(shell pwd)/sources/go-llama \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
|
||||
# TODO: every binary should have its own folder instead, so can have different implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama/llama.cpp/ggml-metal.metal backend-assets/grpc/
|
||||
endif
|
||||
backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/
|
||||
|
||||
## BACKEND CPP LLAMA START
|
||||
# Sets the variables in case it has to build the gRPC locally.
|
||||
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
|
||||
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||
|
||||
backend/cpp/llama/grpc-server:
|
||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||
backend/cpp/grpc/script/build_grpc.sh ${INSTALLED_PACKAGES}
|
||||
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
|
||||
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
|
||||
export PATH=${PATH}:${INSTALLED_PACKAGES}/bin && \
|
||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||
else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||
endif
|
||||
## BACKEND CPP LLAMA END
|
||||
|
||||
##
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||
backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||
cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/sources/go-llama-ggml
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-llama-ggml LIBRARY_PATH=$(shell pwd)/sources/go-llama-ggml \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
backend-assets/grpc/llama-stable: backend-assets/grpc go-llama-stable/libbinding.a
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama-stable
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-stable LIBRARY_PATH=$(shell pwd)/go-llama \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-stable ./cmd/grpc/llama-stable/
|
||||
|
||||
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/
|
||||
|
||||
backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/
|
||||
backend-assets/grpc/dolly: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./cmd/grpc/dolly/
|
||||
|
||||
backend-assets/grpc/gpt2: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./backend/go/llm/gpt2/
|
||||
backend-assets/grpc/gpt2: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./cmd/grpc/gpt2/
|
||||
|
||||
backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/
|
||||
backend-assets/grpc/gptj: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./cmd/grpc/gptj/
|
||||
|
||||
backend-assets/grpc/gptneox: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./backend/go/llm/gptneox/
|
||||
backend-assets/grpc/gptneox: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./cmd/grpc/gptneox/
|
||||
|
||||
backend-assets/grpc/mpt: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./backend/go/llm/mpt/
|
||||
backend-assets/grpc/mpt: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./cmd/grpc/mpt/
|
||||
|
||||
backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/
|
||||
backend-assets/grpc/replit: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./cmd/grpc/replit/
|
||||
|
||||
backend-assets/grpc/falcon-ggml: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./backend/go/llm/falcon-ggml/
|
||||
backend-assets/grpc/falcon-ggml: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./cmd/grpc/falcon-ggml/
|
||||
|
||||
backend-assets/grpc/starcoder: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/sources/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./backend/go/llm/starcoder/
|
||||
backend-assets/grpc/starcoder: backend-assets/grpc go-ggml-transformers/libtransformers.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./cmd/grpc/starcoder/
|
||||
|
||||
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-rwkv LIBRARY_PATH=$(shell pwd)/sources/go-rwkv \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
|
||||
|
||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-bert LIBRARY_PATH=$(shell pwd)/sources/go-bert \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/
|
||||
|
||||
backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
|
||||
|
||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
|
||||
|
||||
backend-assets/grpc/stablediffusion: backend-assets/grpc
|
||||
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
|
||||
$(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/; \
|
||||
fi
|
||||
backend-assets/grpc/stablediffusion: backend-assets/grpc go-stable-diffusion/libstablediffusion.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/
|
||||
|
||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
|
||||
|
||||
backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/sources/whisper.cpp LIBRARY_PATH=$(shell pwd)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||
backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/
|
||||
|
||||
grpcs: prepare $(GRPC_BACKENDS)
|
||||
|
||||
85
README.md
85
README.md
@@ -21,14 +21,15 @@
|
||||
</p>
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/)
|
||||
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
||||
**LocalAI** is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format, pytorch and more. Does not require GPU.
|
||||
|
||||
<p align="center"><b>Follow LocalAI </b></p>
|
||||
<p align="center"><b>Follow LocalAI </b></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||
@@ -38,7 +39,7 @@
|
||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||
</a>
|
||||
|
||||
<p align="center"><b>Connect with the Creator </b></p>
|
||||
<p align="center"><b>Connect with the Creator </b></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://twitter.com/mudler_it" target="blank">
|
||||
@@ -49,12 +50,12 @@
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<p align="center"><b>Share LocalAI Repository</b></p>
|
||||
<p align="center"><b>Share LocalAI Repository</b></p>
|
||||
|
||||
<p align="center">
|
||||
|
||||
<a href="https://twitter.com/intent/tweet?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.&url=https://github.com/go-skynet/LocalAI&hashtags=LocalAI,AI" target="blank">
|
||||
<img src="https://img.shields.io/twitter/follow/_LocalAI?label=Share Repo on Twitter&style=social" alt="Follow _LocalAI"/></a>
|
||||
<img src="https://img.shields.io/twitter/follow/_LocalAI?label=Share Repo on Twitter&style=social" alt="Follow _LocalAI"/></a>
|
||||
<a href="https://t.me/share/url?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.&url=https://github.com/go-skynet/LocalAI" target="_blank"><img src="https://img.shields.io/twitter/url?label=Telegram&logo=Telegram&style=social&url=https://github.com/go-skynet/LocalAI" alt="Share on Telegram"/></a>
|
||||
<a href="https://api.whatsapp.com/send?text=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.%20https://github.com/go-skynet/LocalAI"><img src="https://img.shields.io/twitter/url?label=whatsapp&logo=whatsapp&style=social&url=https://github.com/go-skynet/LocalAI" /></a> <a href="https://www.reddit.com/submit?url=https://github.com/go-skynet/LocalAI&title=Check%20this%20GitHub%20repository%20out.%20LocalAI%20-%20Let%27s%20you%20easily%20run%20LLM%20locally.
|
||||
" target="blank">
|
||||
@@ -63,37 +64,23 @@
|
||||
|
||||
</p>
|
||||
|
||||
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 🔥🔥 Hot topics / Roadmap
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
🆕 New! [LLM finetuning guide](https://localai.io/advanced/fine-tuning/)
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
|
||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||
|
||||
|
||||
|
||||
<hr>
|
||||
|
||||
In a nutshell:
|
||||
|
||||
- Local, OpenAI drop-in alternative REST API. You own your data.
|
||||
- NO GPU required. NO Internet access is required either
|
||||
- Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. See also the [build section](https://localai.io/basics/build/index.html).
|
||||
- Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. See also the [build section](https://localai.io/basics/build/index.html).
|
||||
- Supports multiple models
|
||||
- 🏃 Once loaded the first time, it keep models loaded in memory for faster inference
|
||||
- ⚡ Doesn't shell-out, but uses C++ bindings for a faster inference and better performance.
|
||||
|
||||
LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
|
||||
LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
|
||||
|
||||
Note that this started just as a [fun weekend project](https://localai.io/#backstory) in order to try to create the necessary pieces for a full AI assistant like `ChatGPT`: the community is growing fast and we are working hard to make it better and more stable. If you want to help, please consider contributing (see below)!
|
||||
|
||||
## 🔥🔥 [Hot topics / Roadmap](https://localai.io/#-hot-topics--roadmap)
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
@@ -104,34 +91,7 @@ Note that this started just as a [fun weekend project](https://localai.io/#backs
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||
|
||||
### 🔗 Community and integrations
|
||||
|
||||
Build and deploy custom containers:
|
||||
- https://github.com/sozercan/aikit
|
||||
|
||||
WebUIs:
|
||||
- https://github.com/Jirubizu/localai-admin
|
||||
- https://github.com/go-skynet/LocalAI-frontend
|
||||
|
||||
Model galleries
|
||||
- https://github.com/go-skynet/model-gallery
|
||||
|
||||
Other:
|
||||
- Helm chart https://github.com/go-skynet/helm-charts
|
||||
|
||||
### 🔗 Resources
|
||||
|
||||
- 🆕 New! [LLM finetuning guide](https://localai.io/advanced/fine-tuning/)
|
||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||
- [Projects integrating LocalAI](https://localai.io/integrations/)
|
||||
- [How tos section](https://localai.io/howtos/) (curated by our community)
|
||||
|
||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||
|
||||
@@ -140,6 +100,21 @@ Other:
|
||||
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/)
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||
|
||||
### 💡 Example: Use Luna-AI Llama model
|
||||
|
||||
See the [documentation](https://localai.io/basics/getting_started)
|
||||
|
||||
### 🔗 Resources
|
||||
|
||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||
- [Projects integrating LocalAI](https://localai.io/integrations/)
|
||||
- [How tos section](https://localai.io/howtos/) (curated by our community)
|
||||
|
||||
## Citation
|
||||
|
||||
If you utilize this repository, data in a downstream project, please consider citing it with:
|
||||
@@ -162,12 +137,12 @@ Support the project by becoming [a backer or sponsor](https://github.com/sponsor
|
||||
|
||||
A huge thank you to our generous sponsors who support this project:
|
||||
|
||||
|  |
|
||||
|  |
|
||||
|:-----------------------------------------------:|
|
||||
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
||||
|
||||
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||
|
||||
- [Sponsor list](https://github.com/sponsors/mudler)
|
||||
- JDAM00 (donating HW for the CI)
|
||||
|
||||
83
api/api.go
83
api/api.go
@@ -1,10 +1,8 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
@@ -13,9 +11,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
"github.com/go-skynet/LocalAI/internal"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
@@ -47,10 +43,6 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
||||
}
|
||||
}
|
||||
|
||||
if err := cl.Preload(options.Loader.ModelPath); err != nil {
|
||||
log.Error().Msgf("error downloading models: %s", err.Error())
|
||||
}
|
||||
|
||||
if options.Debug {
|
||||
for _, v := range cl.ListConfigs() {
|
||||
cfg, _ := cl.GetConfig(v)
|
||||
@@ -86,22 +78,6 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
||||
options.Loader.StopAllGRPC()
|
||||
}()
|
||||
|
||||
if options.WatchDog {
|
||||
wd := model.NewWatchDog(
|
||||
options.Loader,
|
||||
options.WatchDogBusyTimeout,
|
||||
options.WatchDogIdleTimeout,
|
||||
options.WatchDogBusy,
|
||||
options.WatchDogIdle)
|
||||
options.Loader.SetWatchDog(wd)
|
||||
go wd.Run()
|
||||
go func() {
|
||||
<-options.Context.Done()
|
||||
log.Debug().Msgf("Context canceled, shutting down")
|
||||
wd.Shutdown()
|
||||
}()
|
||||
}
|
||||
|
||||
return options, cl, nil
|
||||
}
|
||||
|
||||
@@ -144,52 +120,31 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
||||
|
||||
// Default middleware config
|
||||
app.Use(recover.New())
|
||||
if options.Metrics != nil {
|
||||
app.Use(metrics.APIMiddleware(options.Metrics))
|
||||
}
|
||||
|
||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
||||
auth := func(c *fiber.Ctx) error {
|
||||
if len(options.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
// Check for api_keys.json file
|
||||
fileContent, err := os.ReadFile("api_keys.json")
|
||||
if err == nil {
|
||||
// Parse JSON content from the file
|
||||
var fileKeys []string
|
||||
err := json.Unmarshal(fileContent, &fileKeys)
|
||||
if err != nil {
|
||||
return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"message": "Error parsing api_keys.json"})
|
||||
if len(options.ApiKeys) > 0 {
|
||||
authHeader := c.Get("Authorization")
|
||||
if authHeader == "" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
||||
}
|
||||
authHeaderParts := strings.Split(authHeader, " ")
|
||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
||||
}
|
||||
|
||||
// Add file keys to options.ApiKeys
|
||||
options.ApiKeys = append(options.ApiKeys, fileKeys...)
|
||||
}
|
||||
|
||||
if len(options.ApiKeys) == 0 {
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
authHeader := c.Get("Authorization")
|
||||
if authHeader == "" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
||||
}
|
||||
authHeaderParts := strings.Split(authHeader, " ")
|
||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
||||
}
|
||||
|
||||
apiKey := authHeaderParts[1]
|
||||
for _, key := range options.ApiKeys {
|
||||
if apiKey == key {
|
||||
return c.Next()
|
||||
apiKey := authHeaderParts[1]
|
||||
validApiKey := false
|
||||
for _, key := range options.ApiKeys {
|
||||
if apiKey == key {
|
||||
validApiKey = true
|
||||
}
|
||||
}
|
||||
if !validApiKey {
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
||||
}
|
||||
}
|
||||
|
||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
||||
|
||||
return c.Next()
|
||||
}
|
||||
|
||||
if options.CORS {
|
||||
@@ -274,7 +229,5 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(options.Loader, cl))
|
||||
|
||||
app.Get("/metrics", metrics.MetricsHandler())
|
||||
|
||||
return app, nil
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
|
||||
. "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
@@ -163,12 +162,8 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
}
|
||||
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithMetrics(metricsService),
|
||||
options.WithContext(c),
|
||||
options.WithGalleries(galleries),
|
||||
options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
|
||||
@@ -294,14 +289,14 @@ var _ = Describe("API test", func() {
|
||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||
})
|
||||
|
||||
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||
It("runs openllama", Label("llama"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "github:go-skynet/model-gallery/openllama_3b.yaml",
|
||||
Name: "openllama_3b",
|
||||
Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
|
||||
Overrides: map[string]interface{}{"backend": "llama-stable", "mmap": true, "f16": true, "context_size": 128},
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -362,10 +357,9 @@ var _ = Describe("API test", func() {
|
||||
Expect(res["location"]).To(Equal("San Francisco, California, United States"), fmt.Sprint(res))
|
||||
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
|
||||
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
|
||||
|
||||
})
|
||||
|
||||
It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
|
||||
It("runs openllama gguf", Label("llama-gguf"), func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
@@ -458,7 +452,7 @@ var _ = Describe("API test", func() {
|
||||
Eventually(func() bool {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
return response["processed"].(bool)
|
||||
}, "960s", "10s").Should(Equal(true))
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -485,13 +479,9 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
}
|
||||
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithContext(c),
|
||||
options.WithMetrics(metricsService),
|
||||
options.WithAudioDir(tmpdir),
|
||||
options.WithImageDir(tmpdir),
|
||||
options.WithGalleries(galleries),
|
||||
@@ -593,15 +583,12 @@ var _ = Describe("API test", func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
var err error
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
options.WithContext(c),
|
||||
options.WithModelLoader(modelLoader),
|
||||
options.WithMetrics(metricsService),
|
||||
)...)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
@@ -688,7 +675,7 @@ var _ = Describe("API test", func() {
|
||||
Input: []string{"sun", "cat"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred(), err)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||
|
||||
@@ -705,7 +692,7 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
|
||||
Context("External gRPC calls", func() {
|
||||
It("calculate embeddings with sentencetransformers", func() {
|
||||
It("calculate embeddings with huggingface", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
@@ -805,13 +792,10 @@ var _ = Describe("API test", func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
c, cancel = context.WithCancel(context.Background())
|
||||
|
||||
metricsService, err := metrics.SetupMetrics()
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
var err error
|
||||
app, err = App(
|
||||
append(commonOpts,
|
||||
options.WithContext(c),
|
||||
options.WithMetrics(metricsService),
|
||||
options.WithModelLoader(modelLoader),
|
||||
options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
|
||||
)
|
||||
|
||||
@@ -16,18 +16,16 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
||||
model.WithContext(o.Context),
|
||||
model.WithModel(c.Model),
|
||||
model.WithLoadGRPCLoadModelOpts(&proto.ModelOptions{
|
||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||
CUDA: c.Diffusers.CUDA,
|
||||
SchedulerType: c.Diffusers.SchedulerType,
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
CFGScale: c.Diffusers.CFGScale,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
LoraScale: c.LoraScale,
|
||||
LoraBase: c.LoraBase,
|
||||
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||
CLIPModel: c.Diffusers.ClipModel,
|
||||
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||
ControlNet: c.Diffusers.ControlNet,
|
||||
}),
|
||||
})
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ type TokenUsage struct {
|
||||
Completion int
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
modelFile := c.Model
|
||||
|
||||
grpcOpts := gRPCModelOpts(c)
|
||||
@@ -72,7 +72,6 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode
|
||||
fn := func() (LLMResponse, error) {
|
||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||
opts.Prompt = s
|
||||
opts.Images = images
|
||||
|
||||
tokenUsage := TokenUsage{}
|
||||
|
||||
|
||||
@@ -16,10 +16,6 @@ func modelOpts(c config.Config, o *options.Option, opts []model.Option) []model.
|
||||
opts = append(opts, model.WithSingleActiveBackend())
|
||||
}
|
||||
|
||||
if o.ParallelBackendRequests {
|
||||
opts = append(opts, model.EnableParallelRequests)
|
||||
}
|
||||
|
||||
if c.GRPC.Attempts != 0 {
|
||||
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
||||
}
|
||||
@@ -42,36 +38,29 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
|
||||
}
|
||||
|
||||
return &pb.ModelOptions{
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NoMulMatQ: c.NoMulMatQ,
|
||||
CUDA: c.CUDA, // diffusers, transformers
|
||||
DraftModel: c.DraftModel,
|
||||
AudioPath: c.VallE.AudioPath,
|
||||
Quantization: c.Quantization,
|
||||
MMProj: c.MMProj,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
YarnBetaFast: c.YarnBetaFast,
|
||||
YarnBetaSlow: c.YarnBetaSlow,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
LoraBase: c.LoraBase,
|
||||
LoraScale: c.LoraScale,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
ContextSize: int32(c.ContextSize),
|
||||
Seed: int32(c.Seed),
|
||||
NBatch: int32(b),
|
||||
NoMulMatQ: c.NoMulMatQ,
|
||||
DraftModel: c.DraftModel,
|
||||
AudioPath: c.VallE.AudioPath,
|
||||
Quantization: c.Quantization,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
LoraBase: c.LoraBase,
|
||||
NGQA: c.NGQA,
|
||||
RMSNormEps: c.RMSNormEps,
|
||||
F16Memory: c.F16,
|
||||
MLock: c.MMlock,
|
||||
RopeFreqBase: c.RopeFreqBase,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
LowVRAM: c.LowVRAM,
|
||||
NGPULayers: int32(c.NGPULayers),
|
||||
MMap: c.MMap,
|
||||
MainGPU: c.MainGPU,
|
||||
Threads: int32(c.Threads),
|
||||
TensorSplit: c.TensorSplit,
|
||||
// AutoGPTQ
|
||||
ModelBaseName: c.AutoGPTQ.ModelBaseName,
|
||||
Device: c.AutoGPTQ.Device,
|
||||
|
||||
@@ -59,13 +59,9 @@ func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *opt
|
||||
// If the model file is not empty, we pass it joined with the model path
|
||||
modelPath := ""
|
||||
if modelFile != "" {
|
||||
if bb != model.TransformersMusicGen {
|
||||
modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
|
||||
if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
} else {
|
||||
modelPath = modelFile
|
||||
modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
|
||||
if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,8 +8,6 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -40,17 +38,14 @@ type Config struct {
|
||||
|
||||
// Diffusers
|
||||
Diffusers Diffusers `yaml:"diffusers"`
|
||||
Step int `yaml:"step"`
|
||||
|
||||
Step int `yaml:"step"`
|
||||
|
||||
// GRPC Options
|
||||
GRPC GRPC `yaml:"grpc"`
|
||||
|
||||
// Vall-e-x
|
||||
VallE VallE `yaml:"vall-e"`
|
||||
|
||||
// CUDA
|
||||
// Explicitly enable CUDA or not (some backends might need it)
|
||||
CUDA bool `yaml:"cuda"`
|
||||
}
|
||||
|
||||
type VallE struct {
|
||||
@@ -70,16 +65,15 @@ type GRPC struct {
|
||||
}
|
||||
|
||||
type Diffusers struct {
|
||||
CUDA bool `yaml:"cuda"`
|
||||
PipelineType string `yaml:"pipeline_type"`
|
||||
SchedulerType string `yaml:"scheduler_type"`
|
||||
CUDA bool `yaml:"cuda"`
|
||||
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
||||
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
||||
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
||||
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
||||
ClipModel string `yaml:"clip_model"` // Clip model to use
|
||||
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
||||
ControlNet string `yaml:"control_net"`
|
||||
}
|
||||
|
||||
type LLMConfig struct {
|
||||
@@ -106,18 +100,10 @@ type LLMConfig struct {
|
||||
NUMA bool `yaml:"numa"`
|
||||
LoraAdapter string `yaml:"lora_adapter"`
|
||||
LoraBase string `yaml:"lora_base"`
|
||||
LoraScale float32 `yaml:"lora_scale"`
|
||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||
DraftModel string `yaml:"draft_model"`
|
||||
NDraft int32 `yaml:"n_draft"`
|
||||
Quantization string `yaml:"quantization"`
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
YarnExtFactor float32 `yaml:"yarn_ext_factor"`
|
||||
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
||||
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
||||
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
||||
}
|
||||
|
||||
type AutoGPTQ struct {
|
||||
@@ -266,36 +252,6 @@ func (cm *ConfigLoader) ListConfigs() []string {
|
||||
return res
|
||||
}
|
||||
|
||||
func (cm *ConfigLoader) Preload(modelPath string) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
|
||||
for i, config := range cm.configs {
|
||||
modelURL := config.PredictionOptions.Model
|
||||
modelURL = utils.ConvertURL(modelURL)
|
||||
if strings.HasPrefix(modelURL, "http://") || strings.HasPrefix(modelURL, "https://") {
|
||||
// md5 of model name
|
||||
md5Name := utils.MD5(modelURL)
|
||||
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); err == os.ErrNotExist {
|
||||
err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", func(fileName, current, total string, percent float64) {
|
||||
log.Info().Msgf("Downloading %s: %s/%s (%.2f%%)", fileName, current, total, percent)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cc := cm.configs[i]
|
||||
c := &cc
|
||||
c.PredictionOptions.Model = md5Name
|
||||
cm.configs[i] = *c
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *ConfigLoader) LoadConfigs(path string) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
@@ -313,7 +269,7 @@ func (cm *ConfigLoader) LoadConfigs(path string) error {
|
||||
}
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") && !strings.Contains(file.Name(), ".yml") {
|
||||
if !strings.Contains(file.Name(), ".yaml") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadConfig(filepath.Join(path, file.Name()))
|
||||
|
||||
@@ -123,12 +123,13 @@ func BackendMonitorEndpoint(bm BackendMonitor) func(c *fiber.Ctx) error {
|
||||
return err
|
||||
}
|
||||
|
||||
model := bm.options.Loader.CheckIsLoaded(backendId)
|
||||
if model == "" {
|
||||
client := bm.options.Loader.CheckIsLoaded(backendId)
|
||||
|
||||
if client == nil {
|
||||
return fmt.Errorf("backend %s is not currently loaded", backendId)
|
||||
}
|
||||
|
||||
status, rpcErr := model.GRPC(false, nil).Status(context.TODO())
|
||||
status, rpcErr := client.Status(context.TODO())
|
||||
if rpcErr != nil {
|
||||
log.Warn().Msgf("backend %s experienced an error retrieving status info: %s", backendId, rpcErr.Error())
|
||||
val, slbErr := bm.SampleLocalBackendProcess(backendId)
|
||||
|
||||
@@ -81,10 +81,6 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
}
|
||||
|
||||
// process functions if we have any defined or if we have a function call string
|
||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||
log.Debug().Msgf("Response needs to process functions")
|
||||
@@ -144,14 +140,14 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
}
|
||||
}
|
||||
r := config.Roles[role]
|
||||
contentExists := i.Content != nil && i.StringContent != ""
|
||||
contentExists := i.Content != nil && *i.Content != ""
|
||||
// First attempt to populate content via a chat message specific template
|
||||
if config.TemplateConfig.ChatMessage != "" {
|
||||
chatMessageData := model.ChatMessageTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
Role: r,
|
||||
RoleName: role,
|
||||
Content: i.StringContent,
|
||||
Content: *i.Content,
|
||||
MessageIndex: messageIndex,
|
||||
}
|
||||
templatedChatMessage, err := o.Loader.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
||||
@@ -170,7 +166,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
if content == "" {
|
||||
if r != "" {
|
||||
if contentExists {
|
||||
content = fmt.Sprint(r, i.StringContent)
|
||||
content = fmt.Sprint(r, " ", *i.Content)
|
||||
}
|
||||
if i.FunctionCall != nil {
|
||||
j, err := json.Marshal(i.FunctionCall)
|
||||
@@ -184,7 +180,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
}
|
||||
} else {
|
||||
if contentExists {
|
||||
content = fmt.Sprint(i.StringContent)
|
||||
content = fmt.Sprint(*i.Content)
|
||||
}
|
||||
if i.FunctionCall != nil {
|
||||
j, err := json.Marshal(i.FunctionCall)
|
||||
@@ -219,12 +215,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
c.Set("Transfer-Encoding", "chunked")
|
||||
}
|
||||
|
||||
templateFile := ""
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||
templateFile = config.Model
|
||||
}
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Chat != "" && !processFunctions {
|
||||
templateFile = config.TemplateConfig.Chat
|
||||
@@ -234,19 +225,18 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
templateFile = config.TemplateConfig.Functions
|
||||
}
|
||||
|
||||
if templateFile != "" {
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
SuppressSystemPrompt: suppressConfigSystemPrompt,
|
||||
Input: predInput,
|
||||
Functions: funcs,
|
||||
})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
} else {
|
||||
log.Debug().Msgf("Template failed loading: %s", err.Error())
|
||||
}
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
SuppressSystemPrompt: suppressConfigSystemPrompt,
|
||||
Input: predInput,
|
||||
Functions: funcs,
|
||||
})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
} else {
|
||||
log.Debug().Msgf("Template failed loading: %s", err.Error())
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Prompt (after templating): %s", predInput)
|
||||
@@ -344,11 +334,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
||||
// Note: This costs (in term of CPU) another computation
|
||||
config.Grammar = ""
|
||||
images := []string{}
|
||||
for _, m := range input.Messages {
|
||||
images = append(images, m.StringImages...)
|
||||
}
|
||||
predFunc, err := backend.ModelInference(input.Context, predInput, images, o.Loader, *config, o, nil)
|
||||
predFunc, err := backend.ModelInference(input.Context, predInput, o.Loader, *config, o, nil)
|
||||
if err != nil {
|
||||
log.Error().Msgf("inference error: %s", err.Error())
|
||||
return
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/api/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
@@ -65,10 +64,6 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = grammar.JSONBNF
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
if input.Stream {
|
||||
@@ -81,12 +76,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
c.Set("Transfer-Encoding", "chunked")
|
||||
}
|
||||
|
||||
templateFile := ""
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||
templateFile = config.Model
|
||||
}
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Completion != "" {
|
||||
templateFile = config.TemplateConfig.Completion
|
||||
@@ -99,14 +89,13 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
|
||||
predInput := config.PromptStrings[0]
|
||||
|
||||
if templateFile != "" {
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
Input: predInput,
|
||||
})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
Input: predInput,
|
||||
})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
@@ -151,16 +140,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
||||
totalTokenUsage := backend.TokenUsage{}
|
||||
|
||||
for k, i := range config.PromptStrings {
|
||||
if templateFile != "" {
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
Input: i,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
Input: i,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
r, tokenUsage, err := ComputeChoices(
|
||||
|
||||
@@ -30,12 +30,7 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
templateFile := ""
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
||||
templateFile = config.Model
|
||||
}
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Edit != "" {
|
||||
templateFile = config.TemplateConfig.Edit
|
||||
@@ -45,16 +40,15 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
||||
totalTokenUsage := backend.TokenUsage{}
|
||||
|
||||
for _, i := range config.InputStrings {
|
||||
if templateFile != "" {
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
Input: i,
|
||||
Instruction: input.Instruction,
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
|
||||
Input: i,
|
||||
Instruction: input.Instruction,
|
||||
SystemPrompt: config.SystemPrompt,
|
||||
})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||
|
||||
@@ -5,8 +5,6 @@ import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -24,26 +22,6 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func downloadFile(url string) (string, error) {
|
||||
// Get the data
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Create the file
|
||||
out, err := os.CreateTemp("", "image")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
// Write the body to file
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
return out.Name(), err
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/images/create
|
||||
|
||||
/*
|
||||
@@ -78,31 +56,12 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
||||
|
||||
src := ""
|
||||
if input.File != "" {
|
||||
|
||||
fileData := []byte{}
|
||||
// check if input.File is an URL, if so download it and save it
|
||||
// to a temporary file
|
||||
if strings.HasPrefix(input.File, "http://") || strings.HasPrefix(input.File, "https://") {
|
||||
out, err := downloadFile(input.File)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed downloading file:%w", err)
|
||||
}
|
||||
defer os.RemoveAll(out)
|
||||
|
||||
fileData, err = os.ReadFile(out)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading file:%w", err)
|
||||
}
|
||||
|
||||
} else {
|
||||
// base 64 decode the file and write it somewhere
|
||||
// that we will cleanup
|
||||
fileData, err = base64.StdEncoding.DecodeString(input.File)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
//base 64 decode the file and write it somewhere
|
||||
// that we will cleanup
|
||||
decoded, err := base64.StdEncoding.DecodeString(input.File)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create a temporary file
|
||||
outputFile, err := os.CreateTemp(o.ImageDir, "b64")
|
||||
if err != nil {
|
||||
@@ -110,7 +69,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
||||
}
|
||||
// write the base64 result
|
||||
writer := bufio.NewWriter(outputFile)
|
||||
_, err = writer.Write(fileData)
|
||||
_, err = writer.Write(decoded)
|
||||
if err != nil {
|
||||
outputFile.Close()
|
||||
return err
|
||||
@@ -141,7 +100,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
||||
}
|
||||
|
||||
b64JSON := false
|
||||
if input.ResponseFormat.Type == "b64_json" {
|
||||
if input.ResponseFormat == "b64_json" {
|
||||
b64JSON = true
|
||||
}
|
||||
// src and clip_skip
|
||||
|
||||
@@ -23,13 +23,8 @@ func ComputeChoices(
|
||||
n = 1
|
||||
}
|
||||
|
||||
images := []string{}
|
||||
for _, m := range req.Messages {
|
||||
images = append(images, m.StringImages...)
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, images, loader, *config, o, tokenCallback)
|
||||
predFunc, err := backend.ModelInference(req.Context, predInput, loader, *config, o, tokenCallback)
|
||||
if err != nil {
|
||||
return result, backend.TokenUsage{}, err
|
||||
}
|
||||
|
||||
@@ -2,11 +2,8 @@ package openai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -27,7 +24,7 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
||||
input.Cancel = cancel
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
modelFile := input.Model
|
||||
@@ -64,37 +61,6 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
||||
return modelFile, input, nil
|
||||
}
|
||||
|
||||
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||
// encodes it in base64 and returns the base64 string
|
||||
func getBase64Image(s string) (string, error) {
|
||||
if strings.HasPrefix(s, "http") {
|
||||
// download the image
|
||||
resp, err := http.Get(s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// read the image data into memory
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// encode the image data in base64
|
||||
encoded := base64.StdEncoding.EncodeToString(data)
|
||||
|
||||
// return the base64 string
|
||||
return encoded, nil
|
||||
}
|
||||
|
||||
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
|
||||
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
|
||||
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
|
||||
}
|
||||
return "", fmt.Errorf("not valid string")
|
||||
}
|
||||
|
||||
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
@@ -163,35 +129,6 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||
}
|
||||
}
|
||||
|
||||
// Decode each request's message content
|
||||
index := 0
|
||||
for i, m := range input.Messages {
|
||||
switch content := m.Content.(type) {
|
||||
case string:
|
||||
input.Messages[i].StringContent = content
|
||||
case []interface{}:
|
||||
dat, _ := json.Marshal(content)
|
||||
c := []schema.Content{}
|
||||
json.Unmarshal(dat, &c)
|
||||
for _, pp := range c {
|
||||
if pp.Type == "text" {
|
||||
input.Messages[i].StringContent = pp.Text
|
||||
} else if pp.Type == "image_url" {
|
||||
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
|
||||
base64, err := getBase64Image(pp.ImageURL.URL)
|
||||
if err == nil {
|
||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||
// set a placeholder for each image
|
||||
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
|
||||
index++
|
||||
} else {
|
||||
fmt.Print("Failed encoding image", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
@@ -4,9 +4,7 @@ import (
|
||||
"context"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -26,7 +24,6 @@ type Option struct {
|
||||
PreloadModelsFromPath string
|
||||
CORSAllowOrigins string
|
||||
ApiKeys []string
|
||||
Metrics *metrics.Metrics
|
||||
|
||||
Galleries []gallery.Gallery
|
||||
|
||||
@@ -37,13 +34,7 @@ type Option struct {
|
||||
|
||||
AutoloadGalleries bool
|
||||
|
||||
SingleBackend bool
|
||||
ParallelBackendRequests bool
|
||||
|
||||
WatchDogIdle bool
|
||||
WatchDogBusy bool
|
||||
WatchDog bool
|
||||
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
||||
SingleBackend bool
|
||||
}
|
||||
|
||||
type AppOption func(*Option)
|
||||
@@ -69,40 +60,10 @@ func WithCors(b bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
var EnableWatchDog = func(o *Option) {
|
||||
o.WatchDog = true
|
||||
}
|
||||
|
||||
var EnableWatchDogIdleCheck = func(o *Option) {
|
||||
o.WatchDog = true
|
||||
o.WatchDogIdle = true
|
||||
}
|
||||
|
||||
var EnableWatchDogBusyCheck = func(o *Option) {
|
||||
o.WatchDog = true
|
||||
o.WatchDogBusy = true
|
||||
}
|
||||
|
||||
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
|
||||
return func(o *Option) {
|
||||
o.WatchDogBusyTimeout = t
|
||||
}
|
||||
}
|
||||
|
||||
func SetWatchDogIdleTimeout(t time.Duration) AppOption {
|
||||
return func(o *Option) {
|
||||
o.WatchDogIdleTimeout = t
|
||||
}
|
||||
}
|
||||
|
||||
var EnableSingleBackend = func(o *Option) {
|
||||
o.SingleBackend = true
|
||||
}
|
||||
|
||||
var EnableParallelBackendRequests = func(o *Option) {
|
||||
o.ParallelBackendRequests = true
|
||||
}
|
||||
|
||||
var EnableGalleriesAutoload = func(o *Option) {
|
||||
o.AutoloadGalleries = true
|
||||
}
|
||||
@@ -237,9 +198,3 @@ func WithApiKeys(apiKeys []string) AppOption {
|
||||
o.ApiKeys = apiKeys
|
||||
}
|
||||
}
|
||||
|
||||
func WithMetrics(meter *metrics.Metrics) AppOption {
|
||||
return func(o *Option) {
|
||||
o.Metrics = meter
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,25 +55,11 @@ type Choice struct {
|
||||
Text string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
type Content struct {
|
||||
Type string `json:"type" yaml:"type"`
|
||||
Text string `json:"text" yaml:"text"`
|
||||
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||
}
|
||||
|
||||
type ContentURL struct {
|
||||
URL string `json:"url" yaml:"url"`
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
// The message role
|
||||
Role string `json:"role,omitempty" yaml:"role"`
|
||||
// The message content
|
||||
Content interface{} `json:"content" yaml:"content"`
|
||||
|
||||
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
||||
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
||||
|
||||
Content *string `json:"content" yaml:"content"`
|
||||
// A result of a function call
|
||||
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
||||
}
|
||||
@@ -83,12 +69,6 @@ type OpenAIModel struct {
|
||||
Object string `json:"object"`
|
||||
}
|
||||
|
||||
type ChatCompletionResponseFormatType string
|
||||
|
||||
type ChatCompletionResponseFormat struct {
|
||||
Type ChatCompletionResponseFormatType `json:"type,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIRequest struct {
|
||||
config.PredictionOptions
|
||||
|
||||
@@ -98,7 +78,7 @@ type OpenAIRequest struct {
|
||||
// whisper
|
||||
File string `json:"file" validate:"required"`
|
||||
//whisper/image
|
||||
ResponseFormat ChatCompletionResponseFormat `json:"response_format"`
|
||||
ResponseFormat string `json:"response_format"`
|
||||
// image
|
||||
Size string `json:"size"`
|
||||
// Prompt is read only by completion/image API calls
|
||||
|
||||
3
backend/cpp/grpc/.gitignore
vendored
3
backend/cpp/grpc/.gitignore
vendored
@@ -1,3 +0,0 @@
|
||||
installed_packages/
|
||||
grpc_build/
|
||||
grpc_repo/
|
||||
@@ -1,81 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Builds locally from sources the packages needed by the llama cpp backend.
|
||||
|
||||
# Makes sure a few base packages exist.
|
||||
# sudo apt-get --no-upgrade -y install g++ gcc binutils cmake git build-essential autoconf libtool pkg-config
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
echo "Script directory: $SCRIPT_DIR"
|
||||
|
||||
CPP_INSTALLED_PACKAGES_DIR=$1
|
||||
if [ -z ${CPP_INSTALLED_PACKAGES_DIR} ]; then
|
||||
echo "CPP_INSTALLED_PACKAGES_DIR env variable not set. Don't know where to install: failed.";
|
||||
echo
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if [ -d "${CPP_INSTALLED_PACKAGES_DIR}" ]; then
|
||||
echo "gRPC installation directory already exists. Nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# The depth when cloning a git repo. 1 speeds up the clone when the repo history is not needed.
|
||||
GIT_CLONE_DEPTH=1
|
||||
|
||||
NUM_BUILD_THREADS=$(nproc --ignore=1)
|
||||
|
||||
# Google gRPC --------------------------------------------------------------------------------------
|
||||
TAG_LIB_GRPC="v1.59.0"
|
||||
GIT_REPO_LIB_GRPC="https://github.com/grpc/grpc.git"
|
||||
GRPC_REPO_DIR="${SCRIPT_DIR}/../grpc_repo"
|
||||
GRPC_BUILD_DIR="${SCRIPT_DIR}/../grpc_build"
|
||||
SRC_DIR_LIB_GRPC="${GRPC_REPO_DIR}/grpc"
|
||||
|
||||
echo "SRC_DIR_LIB_GRPC: ${SRC_DIR_LIB_GRPC}"
|
||||
echo "GRPC_REPO_DIR: ${GRPC_REPO_DIR}"
|
||||
echo "GRPC_BUILD_DIR: ${GRPC_BUILD_DIR}"
|
||||
|
||||
mkdir -pv ${GRPC_REPO_DIR}
|
||||
|
||||
rm -rf ${GRPC_BUILD_DIR}
|
||||
mkdir -pv ${GRPC_BUILD_DIR}
|
||||
|
||||
mkdir -pv ${CPP_INSTALLED_PACKAGES_DIR}
|
||||
|
||||
if [ -d "${SRC_DIR_LIB_GRPC}" ]; then
|
||||
echo "gRPC source already exists locally. Not cloned again."
|
||||
else
|
||||
( cd ${GRPC_REPO_DIR} && \
|
||||
git clone --depth ${GIT_CLONE_DEPTH} -b ${TAG_LIB_GRPC} ${GIT_REPO_LIB_GRPC} && \
|
||||
cd ${SRC_DIR_LIB_GRPC} && \
|
||||
git submodule update --init --recursive --depth ${GIT_CLONE_DEPTH}
|
||||
)
|
||||
fi
|
||||
|
||||
( cd ${GRPC_BUILD_DIR} && \
|
||||
cmake -G "Unix Makefiles" \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DgRPC_INSTALL=ON \
|
||||
-DEXECUTABLE_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/bin \
|
||||
-DLIBRARY_OUTPUT_PATH=${CPP_INSTALLED_PACKAGES_DIR}/grpc/lib \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
-DgRPC_BUILD_CSHARP_EXT=OFF \
|
||||
-DgRPC_BUILD_GRPC_CPP_PLUGIN=ON \
|
||||
-DgRPC_BUILD_GRPC_CSHARP_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_NODE_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_OBJECTIVE_C_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_PHP_PLUGIN=OFF \
|
||||
-DgRPC_BUILD_GRPC_PYTHON_PLUGIN=ON \
|
||||
-DgRPC_BUILD_GRPC_RUBY_PLUGIN=OFF \
|
||||
-Dprotobuf_WITH_ZLIB=ON \
|
||||
-DRE2_BUILD_TESTING=OFF \
|
||||
-DCMAKE_INSTALL_PREFIX=${CPP_INSTALLED_PACKAGES_DIR}/ \
|
||||
${SRC_DIR_LIB_GRPC} && \
|
||||
cmake --build . -- -j ${NUM_BUILD_THREADS} && \
|
||||
cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
||||
)
|
||||
|
||||
rm -rf ${GRPC_BUILD_DIR}
|
||||
rm -rf ${GRPC_REPO_DIR}
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
set(TARGET myclip)
|
||||
add_library(${TARGET} clip.cpp clip.h)
|
||||
install(TARGETS ${TARGET} LIBRARY)
|
||||
target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if (NOT MSVC)
|
||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||
endif()
|
||||
|
||||
set(TARGET grpc-server)
|
||||
# END CLIP hack
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
set(TARGET grpc-server)
|
||||
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
||||
set(_REFLECTION grpc++_reflection)
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
link_directories("/opt/homebrew/lib")
|
||||
include_directories("/opt/homebrew/include")
|
||||
endif()
|
||||
|
||||
find_package(absl CONFIG REQUIRED)
|
||||
find_package(Protobuf CONFIG REQUIRED)
|
||||
find_package(gRPC CONFIG REQUIRED)
|
||||
|
||||
find_program(_PROTOBUF_PROTOC protoc)
|
||||
set(_GRPC_GRPCPP grpc++)
|
||||
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
include_directories(${Protobuf_INCLUDE_DIRS})
|
||||
|
||||
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
# Proto file
|
||||
get_filename_component(hw_proto "../../../../../../backend/backend.proto" ABSOLUTE)
|
||||
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
||||
|
||||
# Generated sources
|
||||
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc")
|
||||
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h")
|
||||
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc")
|
||||
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
|
||||
COMMAND ${_PROTOBUF_PROTOC}
|
||||
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
-I "${hw_proto_path}"
|
||||
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
|
||||
"${hw_proto}"
|
||||
DEPENDS "${hw_proto}")
|
||||
|
||||
# hw_grpc_proto
|
||||
add_library(hw_grpc_proto
|
||||
${hw_grpc_srcs}
|
||||
${hw_grpc_hdrs}
|
||||
${hw_proto_srcs}
|
||||
${hw_proto_hdrs} )
|
||||
|
||||
add_executable(${TARGET} grpc-server.cpp json.hpp )
|
||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||
absl::flags_parse
|
||||
gRPC::${_REFLECTION}
|
||||
gRPC::${_GRPC_GRPCPP}
|
||||
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
@@ -1,53 +0,0 @@
|
||||
|
||||
LLAMA_VERSION?=
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
|
||||
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||
# to CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblast (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblast)
|
||||
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||
if [ -z "$(LLAMA_VERSION)" ]; then \
|
||||
exit 1; \
|
||||
fi
|
||||
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
llama.cpp/examples/grpc-server:
|
||||
mkdir -p llama.cpp/examples/grpc-server
|
||||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
|
||||
rebuild:
|
||||
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||
rm -rf grpc-server
|
||||
$(MAKE) grpc-server
|
||||
|
||||
clean:
|
||||
rm -rf llama.cpp
|
||||
rm -rf grpc-server
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,21 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Whisper{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Piper{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
# Common commands about conda environment
|
||||
|
||||
## Create a new empty conda environment
|
||||
|
||||
```
|
||||
conda create --name <env-name> python=<your version> -y
|
||||
|
||||
conda create --name autogptq python=3.11 -y
|
||||
```
|
||||
|
||||
## To activate the environment
|
||||
|
||||
As of conda 4.4
|
||||
```
|
||||
conda activate autogptq
|
||||
```
|
||||
|
||||
The conda version older than 4.4
|
||||
|
||||
```
|
||||
source activate autogptq
|
||||
```
|
||||
|
||||
## Install the packages to your environment
|
||||
|
||||
Sometimes you need to install the packages from the conda-forge channel
|
||||
|
||||
By using `conda`
|
||||
```
|
||||
conda install <your-package-name>
|
||||
|
||||
conda install -c conda-forge <your package-name>
|
||||
```
|
||||
|
||||
Or by using `pip`
|
||||
```
|
||||
pip install <your-package-name>
|
||||
```
|
||||
@@ -1,5 +0,0 @@
|
||||
.PHONY: autogptq
|
||||
autogptq:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name autogptq --file autogptq.yml
|
||||
@echo "Virtual environment created."
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the autogptq project
|
||||
|
||||
```
|
||||
make autogptq
|
||||
```
|
||||
@@ -1,86 +0,0 @@
|
||||
name: autogptq
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.23.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- auto-gptq==0.4.2
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- dill==0.3.7
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- safetensors==0.3.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- torch==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==2.0.6
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
File diff suppressed because one or more lines are too long
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the autogptq server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate autogptq
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/autogptq.py $@
|
||||
@@ -1,15 +0,0 @@
|
||||
.PHONY: ttsbark
|
||||
ttsbark:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running bark..."
|
||||
bash run.sh
|
||||
@echo "bark run."
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
@echo "Testing bark..."
|
||||
bash test.sh
|
||||
@echo "bark tested."
|
||||
@@ -1,16 +0,0 @@
|
||||
# Creating a separate environment for ttsbark project
|
||||
|
||||
```
|
||||
make ttsbark
|
||||
```
|
||||
|
||||
# Testing the gRPC server
|
||||
|
||||
```
|
||||
<The path of your python interpreter> -m unittest test_ttsbark.py
|
||||
```
|
||||
|
||||
For example
|
||||
```
|
||||
/opt/conda/envs/bark/bin/python -m unittest extra/grpc/bark/test_ttsbark.py
|
||||
``````
|
||||
File diff suppressed because one or more lines are too long
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the ttsbark server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/ttsbark.py $@
|
||||
@@ -1,81 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "ttsbark.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="v2/en_speaker_4"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="v2/en_speaker_4"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the bark server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test.py
|
||||
@@ -1,10 +0,0 @@
|
||||
CONDA_ENV_PATH = "transformers.yml"
|
||||
|
||||
ifeq ($(BUILD_TYPE), cublas)
|
||||
CONDA_ENV_PATH = "transformers-nvidia.yml"
|
||||
endif
|
||||
|
||||
.PHONY: transformers
|
||||
transformers:
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
bash install.sh $(CONDA_ENV_PATH)
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if conda_env_exists "transformers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name transformers --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
@@ -1,99 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.23.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.2.2
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- peft==0.5.0
|
||||
- git+https://github.com/bigscience-workshop/petals
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors==0.3.3
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- torch==2.1.0
|
||||
- torchaudio==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
prefix: /opt/conda/envs/transformers
|
||||
@@ -1,87 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.23.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.2.2
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- peft==0.5.0
|
||||
- git+https://github.com/bigscience-workshop/petals
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors==0.3.3
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- torch==2.1.0
|
||||
- torchaudio==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
prefix: /opt/conda/envs/transformers
|
||||
@@ -1,14 +0,0 @@
|
||||
.PHONY: diffusers
|
||||
diffusers:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name diffusers --file diffusers.yml
|
||||
@echo "Virtual environment created."
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running diffusers..."
|
||||
bash run.sh
|
||||
@echo "Diffusers run."
|
||||
|
||||
test:
|
||||
bash test.sh
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the diffusers project
|
||||
|
||||
```
|
||||
make diffusers
|
||||
```
|
||||
File diff suppressed because one or more lines are too long
@@ -1,73 +0,0 @@
|
||||
name: diffusers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- compel==2.0.2
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- packaging==23.2
|
||||
- pillow==10.0.1
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyparsing==3.1.1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- safetensors==0.4.0
|
||||
- sympy==1.12
|
||||
- torch==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers>=4.25.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/backend_diffusers.py $@
|
||||
@@ -1,84 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend_diffusers.py", "--addr", "localhost:50051"])
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.kill()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
time.sleep(10)
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
time.sleep(10)
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test(self):
|
||||
"""
|
||||
This method tests if the backend can generate images
|
||||
"""
|
||||
time.sleep(10)
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
|
||||
print(response.message)
|
||||
self.assertTrue(response.success)
|
||||
image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
|
||||
re = stub.GenerateImage(image_req)
|
||||
self.assertTrue(re.success)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Image gen service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test.py
|
||||
@@ -1,12 +0,0 @@
|
||||
.PHONY: exllama
|
||||
exllama:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name exllama --file exllama.yml
|
||||
@echo "Virtual environment created."
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running exllama..."
|
||||
bash run.sh
|
||||
@echo "exllama run."
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the exllama project
|
||||
|
||||
```
|
||||
make exllama
|
||||
```
|
||||
File diff suppressed because one or more lines are too long
@@ -1,56 +0,0 @@
|
||||
name: exllama
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- ninja==1.11.1
|
||||
- protobuf==4.24.4
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- safetensors==0.3.2
|
||||
- sentencepiece==0.1.99
|
||||
- sympy==1.12
|
||||
- torch==2.1.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- numpy
|
||||
prefix: /opt/conda/envs/exllama
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
|
||||
|
||||
git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd
|
||||
|
||||
cp -rfv $CONDA_PREFIX/exllama/* ./
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/exllama.py $@
|
||||
@@ -1,12 +0,0 @@
|
||||
.PHONY: exllama2
|
||||
exllama2:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name exllama2 --file exllama2.yml
|
||||
@echo "Virtual environment created."
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running exllama2..."
|
||||
bash run.sh
|
||||
@echo "exllama2 run."
|
||||
File diff suppressed because one or more lines are too long
@@ -1,57 +0,0 @@
|
||||
name: exllama2
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- protobuf==4.24.4
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- pandas
|
||||
- numpy
|
||||
- ninja
|
||||
- fastparquet
|
||||
- torch>=2.1.0
|
||||
- safetensors>=0.3.2
|
||||
- sentencepiece>=0.1.97
|
||||
- pygments
|
||||
- websockets
|
||||
- regex
|
||||
prefix: /opt/conda/envs/exllama2
|
||||
@@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import grpc
|
||||
from concurrent import futures
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os, glob
|
||||
|
||||
from pathlib import Path
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import version as torch_version
|
||||
|
||||
|
||||
from exllamav2.generator import (
|
||||
ExLlamaV2BaseGenerator,
|
||||
ExLlamaV2Sampler
|
||||
)
|
||||
|
||||
|
||||
from exllamav2 import(
|
||||
ExLlamaV2,
|
||||
ExLlamaV2Config,
|
||||
ExLlamaV2Cache,
|
||||
ExLlamaV2Cache_8bit,
|
||||
ExLlamaV2Tokenizer,
|
||||
model_init,
|
||||
)
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
def LoadModel(self, request, context):
|
||||
try:
|
||||
model_directory = request.ModelFile
|
||||
|
||||
config = ExLlamaV2Config()
|
||||
config.model_dir = model_directory
|
||||
config.prepare()
|
||||
|
||||
model = ExLlamaV2(config)
|
||||
|
||||
cache = ExLlamaV2Cache(model, lazy = True)
|
||||
model.load_autosplit(cache)
|
||||
|
||||
tokenizer = ExLlamaV2Tokenizer(config)
|
||||
|
||||
# Initialize generator
|
||||
|
||||
generator = ExLlamaV2BaseGenerator(model, cache, tokenizer)
|
||||
|
||||
self.generator= generator
|
||||
|
||||
generator.warmup()
|
||||
self.model = model
|
||||
self.tokenizer = tokenizer
|
||||
self.cache = cache
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Predict(self, request, context):
|
||||
|
||||
penalty = 1.15
|
||||
if request.Penalty != 0.0:
|
||||
penalty = request.Penalty
|
||||
|
||||
settings = ExLlamaV2Sampler.Settings()
|
||||
settings.temperature = request.Temperature
|
||||
settings.top_k = request.TopK
|
||||
settings.top_p = request.TopP
|
||||
settings.token_repetition_penalty = penalty
|
||||
settings.disallow_tokens(self.tokenizer, [self.tokenizer.eos_token_id])
|
||||
tokens = 512
|
||||
|
||||
if request.Tokens != 0:
|
||||
tokens = request.Tokens
|
||||
output = self.generator.generate_simple(request.Prompt, settings, tokens, seed = self.seed)
|
||||
|
||||
# Remove prompt from response if present
|
||||
if request.Prompt in output:
|
||||
output = output.replace(request.Prompt, "")
|
||||
|
||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
# Implement PredictStream RPC
|
||||
#for reply in some_data_generator():
|
||||
# yield reply
|
||||
# Not implemented yet
|
||||
return self.Predict(request, context)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama2
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
|
||||
git clone https://github.com/turboderp/exllamav2 $CONDA_PREFIX/exllamav2 && pushd $CONDA_PREFIX/exllamav2 && pip install -r requirements.txt && popd
|
||||
|
||||
cp -rfv $CONDA_PREFIX/exllamav2/* ./
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama2
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/exllama2_backend.py $@
|
||||
@@ -1,15 +0,0 @@
|
||||
.PHONY: petals
|
||||
petals:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running petals..."
|
||||
bash run.sh
|
||||
@echo "petals run."
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
@echo "Testing petals..."
|
||||
bash test.sh
|
||||
@echo "petals tested."
|
||||
File diff suppressed because one or more lines are too long
@@ -1,140 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from concurrent import futures
|
||||
import time
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
import torch
|
||||
from transformers import AutoTokenizer
|
||||
from petals import AutoDistributedModelForCausalLM
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer that implements the Backend service defined in backend.proto.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
Returns a health check message.
|
||||
|
||||
Args:
|
||||
request: The health check request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Reply: The health check reply.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
Loads a language model.
|
||||
|
||||
Args:
|
||||
request: The load model request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The load model result.
|
||||
"""
|
||||
try:
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False)
|
||||
self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model)
|
||||
self.cuda = False
|
||||
if request.CUDA:
|
||||
self.model = self.model.cuda()
|
||||
self.cuda = True
|
||||
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Predict(self, request, context):
|
||||
"""
|
||||
Generates text based on the given prompt and sampling parameters.
|
||||
|
||||
Args:
|
||||
request: The predict request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The predict result.
|
||||
"""
|
||||
|
||||
inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"]
|
||||
if self.cuda:
|
||||
inputs = inputs.cuda()
|
||||
|
||||
if request.Tokens == 0:
|
||||
# Max to max value if tokens are not specified
|
||||
request.Tokens = 8192
|
||||
|
||||
# TODO: kwargs and map all parameters
|
||||
outputs = self.model.generate(inputs, max_new_tokens=request.Tokens)
|
||||
|
||||
generated_text = self.tokenizer.decode(outputs[0])
|
||||
# Remove prompt from response if present
|
||||
if request.Prompt in generated_text:
|
||||
generated_text = generated_text.replace(request.Prompt, "")
|
||||
|
||||
return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8'))
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
"""
|
||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
||||
|
||||
Args:
|
||||
request: The predict stream request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The predict stream result.
|
||||
"""
|
||||
# Implement PredictStream RPC
|
||||
#for reply in some_data_generator():
|
||||
# yield reply
|
||||
# Not implemented yet
|
||||
return self.Predict(request, context)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
@@ -1,30 +0,0 @@
|
||||
name: petals
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
# - _libgcc_mutex=0.1=main
|
||||
# - _openmp_mutex=5.1=1_gnu
|
||||
# - bzip2=1.0.8=h7b6447c_0
|
||||
# - ca-certificates=2023.08.22=h06a4308_0
|
||||
# - ld_impl_linux-64=2.38=h1181459_1
|
||||
# - libffi=3.4.4=h6a678d5_0
|
||||
# - libgcc-ng=11.2.0=h1234567_1
|
||||
# - libgomp=11.2.0=h1234567_1
|
||||
# - libstdcxx-ng=11.2.0=h1234567_1
|
||||
# - libuuid=1.41.5=h5eee18b_0
|
||||
# - ncurses=6.4=h6a678d5_0
|
||||
# - openssl=3.0.11=h7f8727e_2
|
||||
# - pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
# - readline=8.2=h5eee18b_0
|
||||
# - setuptools=68.0.0=py311h06a4308_0
|
||||
# - sqlite=3.41.2=h5eee18b_0
|
||||
# - tk=8.6.12=h1ccaba5_0
|
||||
# - tzdata=2023c=h04d1e81_0
|
||||
# - wheel=0.41.2=py311h06a4308_0
|
||||
# - xz=5.4.2=h5eee18b_0
|
||||
# - zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- torch==2.1.0
|
||||
- git+https://github.com/bigscience-workshop/petals
|
||||
prefix: /opt/conda/envs/petals
|
||||
@@ -1,21 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
# if source is available use it, or use conda
|
||||
#
|
||||
if [ -f /opt/conda/bin/activate ]; then
|
||||
source activate transformers
|
||||
else
|
||||
eval "$(conda shell.bash hook)"
|
||||
conda activate transformers
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/backend_petals.py $@
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the transformers server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test_petals.py
|
||||
@@ -1,58 +0,0 @@
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import grpc
|
||||
import backend_pb2_grpc
|
||||
import backend_pb2
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service.
|
||||
|
||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
||||
"""
|
||||
def setUp(self):
|
||||
self.service = subprocess.Popen(["python", "backend_petals.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m"))
|
||||
print(response)
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,17 +0,0 @@
|
||||
.PHONY: sentencetransformers
|
||||
sentencetransformers:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running sentencetransformers..."
|
||||
bash run.sh
|
||||
@echo "sentencetransformers run."
|
||||
|
||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||
.PHONY: test
|
||||
test:
|
||||
@echo "Testing sentencetransformers..."
|
||||
bash test.sh
|
||||
@echo "sentencetransformers tested."
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the sentencetransformers project
|
||||
|
||||
```
|
||||
make sentencetransformers
|
||||
```
|
||||
File diff suppressed because one or more lines are too long
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the sentencetransformers server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/sentencetransformers.py $@
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the sentencetransformers server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test_sentencetransformers.py
|
||||
@@ -1,81 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "sentencetransformers.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.kill()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_embedding(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
||||
self.assertTrue(response.success)
|
||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
||||
embedding_response = stub.Embedding(embedding_request)
|
||||
self.assertIsNotNone(embedding_response.embeddings)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Embedding service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,16 +0,0 @@
|
||||
|
||||
.PHONY: transformers-musicgen
|
||||
transformers-musicgen:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running transformers..."
|
||||
bash run.sh
|
||||
@echo "transformers run."
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
@echo "Testing transformers..."
|
||||
bash test.sh
|
||||
@echo "transformers tested."
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the transformers project
|
||||
|
||||
```
|
||||
make transformers-musicgen
|
||||
```
|
||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||
"""Client and server classes corresponding to protobuf-defined services."""
|
||||
import grpc
|
||||
|
||||
import backend_pb2 as backend__pb2
|
||||
|
||||
|
||||
class BackendStub(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def __init__(self, channel):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
channel: A grpc.Channel.
|
||||
"""
|
||||
self.Health = channel.unary_unary(
|
||||
'/backend.Backend/Health',
|
||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||
response_deserializer=backend__pb2.Reply.FromString,
|
||||
)
|
||||
self.Predict = channel.unary_unary(
|
||||
'/backend.Backend/Predict',
|
||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||
response_deserializer=backend__pb2.Reply.FromString,
|
||||
)
|
||||
self.LoadModel = channel.unary_unary(
|
||||
'/backend.Backend/LoadModel',
|
||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
||||
response_deserializer=backend__pb2.Result.FromString,
|
||||
)
|
||||
self.PredictStream = channel.unary_stream(
|
||||
'/backend.Backend/PredictStream',
|
||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||
response_deserializer=backend__pb2.Reply.FromString,
|
||||
)
|
||||
self.Embedding = channel.unary_unary(
|
||||
'/backend.Backend/Embedding',
|
||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
||||
)
|
||||
self.GenerateImage = channel.unary_unary(
|
||||
'/backend.Backend/GenerateImage',
|
||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
||||
response_deserializer=backend__pb2.Result.FromString,
|
||||
)
|
||||
self.AudioTranscription = channel.unary_unary(
|
||||
'/backend.Backend/AudioTranscription',
|
||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
||||
)
|
||||
self.TTS = channel.unary_unary(
|
||||
'/backend.Backend/TTS',
|
||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
||||
response_deserializer=backend__pb2.Result.FromString,
|
||||
)
|
||||
self.TokenizeString = channel.unary_unary(
|
||||
'/backend.Backend/TokenizeString',
|
||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
||||
)
|
||||
self.Status = channel.unary_unary(
|
||||
'/backend.Backend/Status',
|
||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
||||
)
|
||||
|
||||
|
||||
class BackendServicer(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def Health(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Predict(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Embedding(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def GenerateImage(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def AudioTranscription(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def TTS(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def TokenizeString(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def Status(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
|
||||
def add_BackendServicer_to_server(servicer, server):
|
||||
rpc_method_handlers = {
|
||||
'Health': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Health,
|
||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||
),
|
||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Predict,
|
||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||
),
|
||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.LoadModel,
|
||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
||||
response_serializer=backend__pb2.Result.SerializeToString,
|
||||
),
|
||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
||||
servicer.PredictStream,
|
||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||
),
|
||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Embedding,
|
||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
||||
),
|
||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.GenerateImage,
|
||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
||||
response_serializer=backend__pb2.Result.SerializeToString,
|
||||
),
|
||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.AudioTranscription,
|
||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
||||
),
|
||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.TTS,
|
||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
||||
response_serializer=backend__pb2.Result.SerializeToString,
|
||||
),
|
||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.TokenizeString,
|
||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
||||
),
|
||||
'Status': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.Status,
|
||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
||||
),
|
||||
}
|
||||
generic_handler = grpc.method_handlers_generic_handler(
|
||||
'backend.Backend', rpc_method_handlers)
|
||||
server.add_generic_rpc_handlers((generic_handler,))
|
||||
|
||||
|
||||
# This class is part of an EXPERIMENTAL API.
|
||||
class Backend(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
@staticmethod
|
||||
def Health(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
||||
backend__pb2.HealthMessage.SerializeToString,
|
||||
backend__pb2.Reply.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Predict(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
||||
backend__pb2.PredictOptions.SerializeToString,
|
||||
backend__pb2.Reply.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def LoadModel(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
||||
backend__pb2.ModelOptions.SerializeToString,
|
||||
backend__pb2.Result.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def PredictStream(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
||||
backend__pb2.PredictOptions.SerializeToString,
|
||||
backend__pb2.Reply.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Embedding(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
||||
backend__pb2.PredictOptions.SerializeToString,
|
||||
backend__pb2.EmbeddingResult.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def GenerateImage(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
||||
backend__pb2.Result.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def AudioTranscription(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
||||
backend__pb2.TranscriptRequest.SerializeToString,
|
||||
backend__pb2.TranscriptResult.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def TTS(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
||||
backend__pb2.TTSRequest.SerializeToString,
|
||||
backend__pb2.Result.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def TokenizeString(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
||||
backend__pb2.PredictOptions.SerializeToString,
|
||||
backend__pb2.TokenizationResponse.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
|
||||
@staticmethod
|
||||
def Status(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
||||
backend__pb2.HealthMessage.SerializeToString,
|
||||
backend__pb2.StatusResponse.FromString,
|
||||
options, channel_credentials,
|
||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the transformers-musicgen server with conda
|
||||
|
||||
echo "Launching gRPC server for transformers-musicgen"
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers-musicgen
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/transformers_server.py $@
|
||||
@@ -1,11 +0,0 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the transformers server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test_transformers.py
|
||||
@@ -1,81 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "transformers_server.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,122 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for MusicgenForConditionalGeneration models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from scipy.io.wavfile import write as write_wav
|
||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
inputs = self.processor(
|
||||
text=[request.text],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
tokens = 256
|
||||
# TODO get tokens from request?
|
||||
audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
|
||||
print("[transformers-musicgen] TTS generated!", file=sys.stderr)
|
||||
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
||||
write_wav(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
|
||||
print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr)
|
||||
print("[transformers-musicgen] TTS for", file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("[transformers-musicgen] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("[transformers-musicgen] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(f"[transformers-musicgen] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
||||
@@ -1,16 +0,0 @@
|
||||
.PHONY: transformers
|
||||
transformers:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
@echo "Running transformers..."
|
||||
bash run.sh
|
||||
@echo "transformers run."
|
||||
|
||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||
.PHONY: test
|
||||
test:
|
||||
@echo "Testing transformers..."
|
||||
bash test.sh
|
||||
@echo "transformers tested."
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the transformers project
|
||||
|
||||
```
|
||||
make transformers
|
||||
```
|
||||
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user