mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-07 05:04:29 -05:00
Compare commits
2 Commits
functions_
...
aio_gpu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
69398b6b6d | ||
|
|
96d44013e2 |
@@ -1,16 +1,6 @@
|
|||||||
.idea
|
.idea
|
||||||
.github
|
|
||||||
.vscode
|
|
||||||
models
|
models
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
examples/**/models
|
examples/**/models
|
||||||
Dockerfile*
|
Dockerfile*
|
||||||
__pycache__
|
|
||||||
|
|
||||||
# SonarQube
|
|
||||||
.scannerwork
|
|
||||||
|
|
||||||
# backend virtual environments
|
|
||||||
**/venv
|
|
||||||
backend/python/**/source
|
|
||||||
38
.env
38
.env
@@ -1,33 +1,33 @@
|
|||||||
## Set number of threads.
|
## Set number of threads.
|
||||||
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
|
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
|
||||||
# LOCALAI_THREADS=14
|
# THREADS=14
|
||||||
|
|
||||||
## Specify a different bind address (defaults to ":8080")
|
## Specify a different bind address (defaults to ":8080")
|
||||||
# LOCALAI_ADDRESS=127.0.0.1:8080
|
# ADDRESS=127.0.0.1:8080
|
||||||
|
|
||||||
## Default models context size
|
## Default models context size
|
||||||
# LOCALAI_CONTEXT_SIZE=512
|
# CONTEXT_SIZE=512
|
||||||
#
|
#
|
||||||
## Define galleries.
|
## Define galleries.
|
||||||
## models will to install will be visible in `/models/available`
|
## models will to install will be visible in `/models/available`
|
||||||
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
|
# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
||||||
|
|
||||||
## CORS settings
|
## CORS settings
|
||||||
# LOCALAI_CORS=true
|
# CORS=true
|
||||||
# LOCALAI_CORS_ALLOW_ORIGINS=*
|
# CORS_ALLOW_ORIGINS=*
|
||||||
|
|
||||||
## Default path for models
|
## Default path for models
|
||||||
#
|
#
|
||||||
# LOCALAI_MODELS_PATH=/models
|
# MODELS_PATH=/models
|
||||||
|
|
||||||
## Enable debug mode
|
## Enable debug mode
|
||||||
# LOCALAI_LOG_LEVEL=debug
|
# DEBUG=true
|
||||||
|
|
||||||
## Disables COMPEL (Diffusers)
|
## Disables COMPEL (Diffusers)
|
||||||
# COMPEL=0
|
# COMPEL=0
|
||||||
|
|
||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
# SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
@@ -46,13 +46,13 @@
|
|||||||
# GO_TAGS=stablediffusion
|
# GO_TAGS=stablediffusion
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# IMAGE_PATH=/tmp
|
||||||
|
|
||||||
## Specify a default upload limit in MB (whisper)
|
## Specify a default upload limit in MB (whisper)
|
||||||
# LOCALAI_UPLOAD_LIMIT=15
|
# UPLOAD_LIMIT
|
||||||
|
|
||||||
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
|
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
|
||||||
# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
|
# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
|
||||||
|
|
||||||
### Advanced settings ###
|
### Advanced settings ###
|
||||||
### Those are not really used by LocalAI, but from components in the stack ###
|
### Those are not really used by LocalAI, but from components in the stack ###
|
||||||
@@ -72,18 +72,18 @@
|
|||||||
# LLAMACPP_PARALLEL=1
|
# LLAMACPP_PARALLEL=1
|
||||||
|
|
||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
# LOCALAI_PARALLEL_REQUESTS=true
|
# PARALLEL_REQUESTS=true
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
# LOCALAI_WATCHDOG_IDLE=true
|
# WATCHDOG_IDLE=true
|
||||||
#
|
|
||||||
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
|
||||||
# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
|
|
||||||
#
|
#
|
||||||
# Enables watchdog to kill backends that are busy for too much time
|
# Enables watchdog to kill backends that are busy for too much time
|
||||||
# LOCALAI_WATCHDOG_BUSY=true
|
# WATCHDOG_BUSY=true
|
||||||
|
#
|
||||||
|
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
||||||
|
# WATCHDOG_IDLE_TIMEOUT=5m
|
||||||
#
|
#
|
||||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
# WATCHDOG_BUSY_TIMEOUT=5m
|
||||||
2
.github/bump_docs.sh
vendored
2
.github/bump_docs.sh
vendored
@@ -2,6 +2,6 @@
|
|||||||
set -xe
|
set -xe
|
||||||
REPO=$1
|
REPO=$1
|
||||||
|
|
||||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
|
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
|
||||||
|
|
||||||
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
||||||
|
|||||||
111
.github/checksum_checker.sh
vendored
111
.github/checksum_checker.sh
vendored
@@ -1,111 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# This scripts needs yq and huggingface_hub to be installed
|
|
||||||
# to install hugingface_hub run pip install huggingface_hub
|
|
||||||
|
|
||||||
# Path to the input YAML file
|
|
||||||
input_yaml=$1
|
|
||||||
|
|
||||||
# Function to download file and check checksum using Python
|
|
||||||
function check_and_update_checksum() {
|
|
||||||
model_name="$1"
|
|
||||||
file_name="$2"
|
|
||||||
uri="$3"
|
|
||||||
old_checksum="$4"
|
|
||||||
idx="$5"
|
|
||||||
|
|
||||||
# Download the file and calculate new checksum using Python
|
|
||||||
new_checksum=$(python3 -c "
|
|
||||||
import hashlib
|
|
||||||
from huggingface_hub import hf_hub_download
|
|
||||||
import requests
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
uri = '$uri'
|
|
||||||
file_name = uri.split('/')[-1]
|
|
||||||
|
|
||||||
# Function to parse the URI and determine download method
|
|
||||||
# Function to parse the URI and determine download method
|
|
||||||
def parse_uri(uri):
|
|
||||||
if uri.startswith('huggingface://'):
|
|
||||||
repo_id = uri.split('://')[1]
|
|
||||||
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
|
||||||
elif 'huggingface.co' in uri:
|
|
||||||
parts = uri.split('/resolve/')
|
|
||||||
if len(parts) > 1:
|
|
||||||
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
|
||||||
return 'huggingface', repo_path
|
|
||||||
return 'direct', uri
|
|
||||||
|
|
||||||
def calculate_sha256(file_path):
|
|
||||||
sha256_hash = hashlib.sha256()
|
|
||||||
with open(file_path, 'rb') as f:
|
|
||||||
for byte_block in iter(lambda: f.read(4096), b''):
|
|
||||||
sha256_hash.update(byte_block)
|
|
||||||
return sha256_hash.hexdigest()
|
|
||||||
|
|
||||||
download_type, repo_id_or_url = parse_uri(uri)
|
|
||||||
|
|
||||||
# Decide download method based on URI type
|
|
||||||
if download_type == 'huggingface':
|
|
||||||
try:
|
|
||||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
|
||||||
except Exception as e:
|
|
||||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
response = requests.get(repo_id_or_url)
|
|
||||||
if response.status_code == 200:
|
|
||||||
with open(file_name, 'wb') as f:
|
|
||||||
f.write(response.content)
|
|
||||||
file_path = file_name
|
|
||||||
elif response.status_code == 404:
|
|
||||||
print(f'File not found: {response.status_code}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print(calculate_sha256(file_path))
|
|
||||||
# Clean up the downloaded file
|
|
||||||
os.remove(file_path)
|
|
||||||
")
|
|
||||||
|
|
||||||
if [[ "$new_checksum" == "" ]]; then
|
|
||||||
echo "Error calculating checksum for $file_name. Skipping..."
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checksum for $file_name: $new_checksum"
|
|
||||||
|
|
||||||
# Compare and update the YAML file if checksums do not match
|
|
||||||
result=$?
|
|
||||||
if [[ $result -eq 2 ]]; then
|
|
||||||
echo "File not found, deleting entry for $file_name..."
|
|
||||||
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
|
|
||||||
elif [[ "$old_checksum" != "$new_checksum" ]]; then
|
|
||||||
echo "Checksum mismatch for $file_name. Updating..."
|
|
||||||
yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
|
|
||||||
yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
|
|
||||||
elif [[ $result -ne 0 ]]; then
|
|
||||||
echo "Error downloading file $file_name. Skipping..."
|
|
||||||
else
|
|
||||||
echo "Checksum match for $file_name. No update needed."
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Read the YAML and process each file
|
|
||||||
len=$(yq eval '. | length' "$input_yaml")
|
|
||||||
for ((i=0; i<$len; i++))
|
|
||||||
do
|
|
||||||
name=$(yq eval ".[$i].name" "$input_yaml")
|
|
||||||
files_len=$(yq eval ".[$i].files | length" "$input_yaml")
|
|
||||||
for ((j=0; j<$files_len; j++))
|
|
||||||
do
|
|
||||||
filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
|
|
||||||
uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
|
|
||||||
checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
|
|
||||||
echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
|
|
||||||
check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
|
|
||||||
done
|
|
||||||
done
|
|
||||||
25
.github/dependabot.yml
vendored
25
.github/dependabot.yml
vendored
@@ -1,25 +0,0 @@
|
|||||||
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
|
||||||
version: 2
|
|
||||||
updates:
|
|
||||||
- package-ecosystem: "gomod"
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "github-actions"
|
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
# Check for updates to GitHub Actions every weekday
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
# Check for updates to GitHub Actions every weekday
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
# Check for updates to GitHub Actions every weekday
|
|
||||||
interval: "weekly"
|
|
||||||
24
.github/labeler.yml
vendored
24
.github/labeler.yml
vendored
@@ -1,24 +0,0 @@
|
|||||||
enhancements:
|
|
||||||
- head-branch: ['^feature', 'feature']
|
|
||||||
|
|
||||||
kind/documentation:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'docs/*'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.md'
|
|
||||||
|
|
||||||
area/ai-model:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'gallery/*'
|
|
||||||
|
|
||||||
examples:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'examples/*'
|
|
||||||
|
|
||||||
ci:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '.github/*'
|
|
||||||
2
.github/workflows/bump_deps.yaml
vendored
2
.github/workflows/bump_deps.yaml
vendored
@@ -49,7 +49,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v5
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v5
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
47
.github/workflows/checksum_checker.yaml
vendored
47
.github/workflows/checksum_checker.yaml
vendored
@@ -1,47 +0,0 @@
|
|||||||
name: Check if checksums are up-to-date
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: 0 20 * * *
|
|
||||||
workflow_dispatch:
|
|
||||||
jobs:
|
|
||||||
checksum_check:
|
|
||||||
runs-on: arc-runner-set
|
|
||||||
steps:
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y pip wget
|
|
||||||
sudo pip install --upgrade pip
|
|
||||||
pip install huggingface_hub
|
|
||||||
- name: 'Setup yq'
|
|
||||||
uses: dcarbone/install-yq-action@v1.1.1
|
|
||||||
with:
|
|
||||||
version: 'v4.43.1'
|
|
||||||
download-compressed: true
|
|
||||||
force: true
|
|
||||||
|
|
||||||
- name: Checksum checker 🔧
|
|
||||||
run: |
|
|
||||||
export HF_HOME=/hf_cache
|
|
||||||
sudo mkdir /hf_cache
|
|
||||||
sudo chmod 777 /hf_cache
|
|
||||||
bash .github/checksum_checker.sh gallery/index.yaml
|
|
||||||
- name: Create Pull Request
|
|
||||||
uses: peter-evans/create-pull-request@v6
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
|
||||||
push-to-fork: ci-forks/LocalAI
|
|
||||||
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
|
|
||||||
title: 'models(gallery): :arrow_up: update checksum'
|
|
||||||
branch: "update/checksum"
|
|
||||||
body: Updating checksums in gallery/index.yaml
|
|
||||||
signoff: true
|
|
||||||
43
.github/workflows/dependabot_auto.yml
vendored
43
.github/workflows/dependabot_auto.yml
vendored
@@ -1,43 +0,0 @@
|
|||||||
name: Dependabot auto-merge
|
|
||||||
on:
|
|
||||||
- pull_request_target
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
packages: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
dependabot:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: ${{ github.actor == 'dependabot[bot]' }}
|
|
||||||
steps:
|
|
||||||
- name: Dependabot metadata
|
|
||||||
id: metadata
|
|
||||||
uses: dependabot/fetch-metadata@v2.1.0
|
|
||||||
with:
|
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
|
||||||
skip-commit-verification: true
|
|
||||||
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Approve a PR if not already approved
|
|
||||||
run: |
|
|
||||||
gh pr checkout "$PR_URL"
|
|
||||||
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
|
|
||||||
then
|
|
||||||
gh pr review --approve "$PR_URL"
|
|
||||||
else
|
|
||||||
echo "PR already approved.";
|
|
||||||
fi
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
|
|
||||||
- name: Enable auto-merge for Dependabot PRs
|
|
||||||
if: ${{ contains(github.event.pull_request.title, 'bump')}}
|
|
||||||
run: gh pr merge --auto --squash "$PR_URL"
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
94
.github/workflows/generate_grpc_cache.yaml
vendored
94
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -1,94 +0,0 @@
|
|||||||
name: 'generate and publish GRPC docker caches'
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
generate_caches:
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- grpc-base-image: ubuntu:22.04
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
runs-on: ${{matrix.runs-on}}
|
|
||||||
steps:
|
|
||||||
- name: Release space from worker
|
|
||||||
if: matrix.runs-on == 'ubuntu-latest'
|
|
||||||
run: |
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
df -h
|
|
||||||
echo
|
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get remove -y microsoft-edge-stable || true
|
|
||||||
sudo apt-get remove -y firefox || true
|
|
||||||
sudo apt-get remove -y powershell || true
|
|
||||||
sudo apt-get remove -y r-base-core || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
sudo rm -rf /usr/share/dotnet || true
|
|
||||||
sudo rm -rf /opt/ghc || true
|
|
||||||
sudo rm -rf "/usr/local/share/boost" || true
|
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@master
|
|
||||||
with:
|
|
||||||
platforms: all
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
id: buildx
|
|
||||||
uses: docker/setup-buildx-action@master
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Cache GRPC
|
|
||||||
uses: docker/build-push-action@v5
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
|
||||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
|
||||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
|
||||||
build-args: |
|
|
||||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
|
||||||
GRPC_VERSION=v1.63.0
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
cache-to: type=gha,ignore-error=true
|
|
||||||
cache-from: type=gha
|
|
||||||
target: grpc
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
push: false
|
|
||||||
22
.github/workflows/image-pr.yml
vendored
22
.github/workflows/image-pr.yml
vendored
@@ -22,8 +22,7 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
makeflags: "-j3"
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -43,7 +42,6 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -54,27 +52,22 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: 'sycl-f16-ffmpeg'
|
tag-suffix: 'sycl-f16-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
@@ -88,8 +81,7 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
makeflags: "-j3"
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -106,17 +98,14 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -127,4 +116,3 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
74
.github/workflows/image.yml
vendored
74
.github/workflows/image.yml
vendored
@@ -26,11 +26,8 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
aio: ${{ matrix.aio }}
|
aio: ${{ matrix.aio }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: "-j3"
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -52,7 +49,6 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
@@ -61,7 +57,6 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -72,7 +67,6 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -83,7 +77,6 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -95,9 +88,6 @@ jobs:
|
|||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-11"
|
aio: "-aio-gpu-nvidia-cuda-11"
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -109,9 +99,6 @@ jobs:
|
|||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-12"
|
aio: "-aio-gpu-nvidia-cuda-12"
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
@@ -121,7 +108,6 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
@@ -129,109 +115,83 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
aio: "-aio-gpu-hipblas"
|
aio: "-aio-gpu-hipblas"
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
latest-image: 'latest-gpu-hipblas'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-ffmpeg'
|
tag-suffix: '-sycl-f16-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f16"
|
aio: "-aio-gpu-intel-f16"
|
||||||
latest-image: 'latest-gpu-intel-f16'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-ffmpeg'
|
tag-suffix: '-sycl-f32-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f32"
|
aio: "-aio-gpu-intel-f32"
|
||||||
latest-image: 'latest-gpu-intel-f32'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
# Core images
|
# Core images
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-core'
|
tag-suffix: '-sycl-f16-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-core'
|
tag-suffix: '-sycl-f32-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas-ffmpeg-core'
|
tag-suffix: '-hipblas-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas-core'
|
tag-suffix: '-hipblas-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
@@ -247,10 +207,7 @@ jobs:
|
|||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
aio: ${{ matrix.aio }}
|
aio: ${{ matrix.aio }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
makeflags: "-j3"
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -268,9 +225,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
aio: "-aio-cpu"
|
aio: "-aio-cpu"
|
||||||
latest-image: 'latest-cpu'
|
|
||||||
latest-image-aio: 'latest-aio-cpu'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -281,7 +235,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -292,7 +245,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -303,7 +255,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -314,4 +265,3 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
|
|||||||
62
.github/workflows/image_build.yml
vendored
62
.github/workflows/image_build.yml
vendored
@@ -6,10 +6,6 @@ on:
|
|||||||
inputs:
|
inputs:
|
||||||
base-image:
|
base-image:
|
||||||
description: 'Base image'
|
description: 'Base image'
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
grpc-base-image:
|
|
||||||
description: 'GRPC Base image, must be a compatible image with base-image'
|
|
||||||
required: false
|
required: false
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
@@ -33,14 +29,6 @@ on:
|
|||||||
description: 'Tag latest'
|
description: 'Tag latest'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
latest-image:
|
|
||||||
description: 'Tag latest'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
latest-image-aio:
|
|
||||||
description: 'Tag latest'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
tag-suffix:
|
tag-suffix:
|
||||||
description: 'Tag suffix'
|
description: 'Tag suffix'
|
||||||
default: ''
|
default: ''
|
||||||
@@ -61,7 +49,7 @@ on:
|
|||||||
makeflags:
|
makeflags:
|
||||||
description: 'Make Flags'
|
description: 'Make Flags'
|
||||||
required: false
|
required: false
|
||||||
default: '--jobs=4 --output-sync=target'
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
aio:
|
aio:
|
||||||
description: 'AIO Image Name'
|
description: 'AIO Image Name'
|
||||||
@@ -91,7 +79,6 @@ jobs:
|
|||||||
&& sudo apt-get install -y git
|
&& sudo apt-get install -y git
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Release space from worker
|
- name: Release space from worker
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
if: inputs.runs-on == 'ubuntu-latest'
|
||||||
run: |
|
run: |
|
||||||
@@ -133,7 +120,6 @@ jobs:
|
|||||||
sudo rm -rf "/usr/local/share/boost" || true
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
df -h
|
df -h
|
||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
@@ -148,7 +134,6 @@ jobs:
|
|||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.tag-suffix }}
|
suffix=${{ inputs.tag-suffix }}
|
||||||
|
|
||||||
- name: Docker meta AIO (quay.io)
|
- name: Docker meta AIO (quay.io)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
id: meta_aio
|
id: meta_aio
|
||||||
@@ -162,7 +147,6 @@ jobs:
|
|||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.aio }}
|
suffix=${{ inputs.aio }}
|
||||||
|
|
||||||
- name: Docker meta AIO (dockerhub)
|
- name: Docker meta AIO (dockerhub)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
id: meta_aio_dockerhub
|
id: meta_aio_dockerhub
|
||||||
@@ -176,7 +160,6 @@ jobs:
|
|||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.aio }}
|
suffix=${{ inputs.aio }}
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@master
|
uses: docker/setup-qemu-action@master
|
||||||
with:
|
with:
|
||||||
@@ -205,10 +188,6 @@ jobs:
|
|||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
|
||||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
|
||||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
|
||||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
|
||||||
build-args: |
|
build-args: |
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
@@ -216,18 +195,21 @@ jobs:
|
|||||||
FFMPEG=${{ inputs.ffmpeg }}
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
IMAGE_TYPE=${{ inputs.image-type }}
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
|
||||||
GRPC_VERSION=v1.63.0
|
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
-
|
||||||
|
name: Inspect image
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
run: |
|
||||||
|
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||||
|
docker image inspect localai/localai:${{ steps.meta.outputs.version }}
|
||||||
|
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
|
docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
- name: Build and push AIO image
|
- name: Build and push AIO image
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
@@ -235,14 +217,12 @@ jobs:
|
|||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile.aio
|
file: ./Dockerfile.aio
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.meta_aio.outputs.tags }}
|
tags: ${{ steps.meta_aio.outputs.tags }}
|
||||||
labels: ${{ steps.meta_aio.outputs.labels }}
|
labels: ${{ steps.meta_aio.outputs.labels }}
|
||||||
|
|
||||||
- name: Build and push AIO image (dockerhub)
|
- name: Build and push AIO image (dockerhub)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
@@ -250,39 +230,15 @@ jobs:
|
|||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
|
BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile.aio
|
file: ./Dockerfile.aio
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
||||||
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
||||||
|
|
||||||
- name: Latest tag
|
|
||||||
# run this on branches, when it is a tag and there is a latest-image defined
|
|
||||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
|
||||||
run: |
|
|
||||||
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
|
||||||
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
|
|
||||||
docker push localai/localai:${{ inputs.latest-image }}
|
|
||||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
|
||||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
|
||||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
|
||||||
- name: Latest AIO tag
|
|
||||||
# run this on branches, when it is a tag and there is a latest-image defined
|
|
||||||
if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
|
|
||||||
run: |
|
|
||||||
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
|
|
||||||
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
|
|
||||||
docker push localai/localai:${{ inputs.latest-image-aio }}
|
|
||||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
|
||||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
|
||||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
|
||||||
|
|
||||||
- name: job summary
|
- name: job summary
|
||||||
run: |
|
run: |
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
- name: job summary(AIO)
|
- name: job summary(AIO)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
12
.github/workflows/labeler.yml
vendored
12
.github/workflows/labeler.yml
vendored
@@ -1,12 +0,0 @@
|
|||||||
name: "Pull Request Labeler"
|
|
||||||
on:
|
|
||||||
- pull_request_target
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
labeler:
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: write
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/labeler@v5
|
|
||||||
35
.github/workflows/localaibot_automerge.yml
vendored
35
.github/workflows/localaibot_automerge.yml
vendored
@@ -1,35 +0,0 @@
|
|||||||
name: LocalAI-bot auto-merge
|
|
||||||
on:
|
|
||||||
- pull_request_target
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
packages: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
dependabot:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: ${{ github.actor == 'localai-bot' }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Approve a PR if not already approved
|
|
||||||
run: |
|
|
||||||
gh pr checkout "$PR_URL"
|
|
||||||
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
|
|
||||||
then
|
|
||||||
gh pr review --approve "$PR_URL"
|
|
||||||
else
|
|
||||||
echo "PR already approved.";
|
|
||||||
fi
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
|
|
||||||
- name: Enable auto-merge for LocalAIBot PRs
|
|
||||||
run: gh pr merge --auto --squash "$PR_URL"
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
111
.github/workflows/release.yaml
vendored
111
.github/workflows/release.yaml
vendored
@@ -1,11 +1,6 @@
|
|||||||
name: Build and Release
|
name: Build and Release
|
||||||
|
|
||||||
on:
|
on: push
|
||||||
- push
|
|
||||||
- pull_request
|
|
||||||
|
|
||||||
env:
|
|
||||||
GRPC_VERSION: v1.63.0
|
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
@@ -16,58 +11,79 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-linux:
|
build-linux:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build: 'avx2'
|
||||||
|
defines: ''
|
||||||
|
- build: 'avx'
|
||||||
|
defines: '-DLLAMA_AVX2=OFF'
|
||||||
|
- build: 'avx512'
|
||||||
|
defines: '-DLLAMA_AVX512=ON'
|
||||||
|
- build: 'cuda12'
|
||||||
|
defines: ''
|
||||||
|
- build: 'cuda11'
|
||||||
|
defines: ''
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '>=1.21.0'
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg protobuf-compiler
|
sudo apt-get install build-essential ffmpeg
|
||||||
- name: Install CUDA Dependencies
|
- name: Install CUDA Dependencies
|
||||||
|
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
|
||||||
run: |
|
run: |
|
||||||
|
if [ "${{ matrix.build }}" == "cuda12" ]; then
|
||||||
|
export CUDA_VERSION=12-3
|
||||||
|
else
|
||||||
|
export CUDA_VERSION=11-7
|
||||||
|
fi
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
env:
|
|
||||||
CUDA_VERSION: 12-3
|
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v3
|
||||||
with:
|
with:
|
||||||
path: grpc
|
path: grpc
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
key: ${{ runner.os }}-grpc
|
||||||
- name: Build grpc
|
- name: Build grpc
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make --jobs 5 --output-sync=target
|
../.. && sudo make -j12
|
||||||
- name: Install gRPC
|
- name: Install gRPC
|
||||||
run: |
|
run: |
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
cd grpc && cd cmake/build && sudo make -j12 install
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
|
env:
|
||||||
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
export BUILD_TYPE=cublas
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
else
|
||||||
|
STATIC=true make dist
|
||||||
|
fi
|
||||||
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: LocalAI-linux
|
name: ${{ matrix.build }}
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
- name: Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v1
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
@@ -80,54 +96,65 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '>=1.21.0'
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler
|
sudo apt-get install -y --no-install-recommends libopencv-dev
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
|
||||||
- name: Build stablediffusion
|
- name: Build stablediffusion
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
make backend-assets/grpc/stablediffusion
|
make backend-assets/grpc/stablediffusion
|
||||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
path: release/
|
path: release/
|
||||||
|
- name: Release
|
||||||
|
uses: softprops/action-gh-release@v1
|
||||||
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
|
with:
|
||||||
|
files: |
|
||||||
|
release/*
|
||||||
|
|
||||||
build-macOS-arm64:
|
build-macOS:
|
||||||
runs-on: macos-14
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build: 'avx2'
|
||||||
|
defines: ''
|
||||||
|
- build: 'avx'
|
||||||
|
defines: '-DLLAMA_AVX2=OFF'
|
||||||
|
- build: 'avx512'
|
||||||
|
defines: '-DLLAMA_AVX512=ON'
|
||||||
|
runs-on: macOS-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '>=1.21.0'
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
brew install protobuf grpc
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
|
env:
|
||||||
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: LocalAI-MacOS-arm64
|
name: ${{ matrix.build }}
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
- name: Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v1
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
|
|||||||
30
.github/workflows/secscan.yaml
vendored
30
.github/workflows/secscan.yaml
vendored
@@ -1,30 +0,0 @@
|
|||||||
name: "Security Scan"
|
|
||||||
|
|
||||||
# Run workflow each time code is pushed to your repository and on a schedule.
|
|
||||||
# The scheduled workflow runs every at 00:00 on Sunday UTC time.
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * 0'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
tests:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
GO111MODULE: on
|
|
||||||
steps:
|
|
||||||
- name: Checkout Source
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
|
||||||
- name: Run Gosec Security Scanner
|
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
|
||||||
uses: securego/gosec@master
|
|
||||||
with:
|
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
|
||||||
- name: Upload SARIF file
|
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
|
||||||
with:
|
|
||||||
# Path to SARIF file relative to the root of the repository
|
|
||||||
sarif_file: results.sarif
|
|
||||||
258
.github/workflows/test-extra.yml
vendored
258
.github/workflows/test-extra.yml
vendored
@@ -25,16 +25,23 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user grpcio-tools==1.63.0
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test transformers
|
- name: Test transformers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
make -C backend/python/transformers
|
||||||
|
make -C backend/python/transformers test
|
||||||
|
|
||||||
tests-sentencetransformers:
|
tests-sentencetransformers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -47,39 +54,23 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user grpcio-tools==1.63.0
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test sentencetransformers
|
- name: Test sentencetransformers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
make -C backend/python/sentencetransformers
|
||||||
|
make -C backend/python/sentencetransformers test
|
||||||
|
|
||||||
tests-rerankers:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user grpcio-tools==1.63.0
|
|
||||||
|
|
||||||
- name: Test rerankers
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/rerankers
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/rerankers test
|
|
||||||
|
|
||||||
tests-diffusers:
|
tests-diffusers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -91,38 +82,25 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
# Install UV
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user grpcio-tools==1.63.0
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test diffusers
|
- name: Test diffusers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make -C backend/python/diffusers
|
||||||
|
make -C backend/python/diffusers test
|
||||||
|
|
||||||
tests-parler-tts:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user grpcio-tools==1.63.0
|
|
||||||
|
|
||||||
- name: Test parler-tts
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
|
||||||
|
|
||||||
tests-transformers-musicgen:
|
tests-transformers-musicgen:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -135,40 +113,54 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user grpcio-tools==1.63.0
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test transformers-musicgen
|
- name: Test transformers-musicgen
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
make -C backend/python/transformers-musicgen
|
||||||
|
make -C backend/python/transformers-musicgen test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-petals:
|
tests-petals:
|
||||||
# runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# steps:
|
steps:
|
||||||
# - name: Clone
|
- name: Clone
|
||||||
# uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
# with:
|
with:
|
||||||
# submodules: true
|
submodules: true
|
||||||
# - name: Dependencies
|
- name: Dependencies
|
||||||
# run: |
|
run: |
|
||||||
# sudo apt-get update
|
sudo apt-get update
|
||||||
# sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# # Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
# sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
# pip install --user grpcio-tools==1.63.0
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
# - name: Test petals
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
# run: |
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
- name: Test petals
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/petals
|
||||||
|
make -C backend/python/petals test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -223,16 +215,23 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# sudo apt-get update
|
# sudo apt-get update
|
||||||
# sudo apt-get install build-essential ffmpeg
|
# sudo apt-get install build-essential ffmpeg
|
||||||
# # Install UV
|
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
# pip install --user grpcio-tools==1.63.0
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo apt-get update && \
|
||||||
|
# sudo apt-get install -y conda
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
# - name: Test bark
|
# - name: Test bark
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark
|
# export PATH=$PATH:/opt/conda/bin
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
# make -C backend/python/bark
|
||||||
|
# make -C backend/python/bark test
|
||||||
|
|
||||||
|
|
||||||
# Below tests needs GPU. Commented out for now
|
# Below tests needs GPU. Commented out for now
|
||||||
@@ -248,15 +247,21 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# sudo apt-get update
|
# sudo apt-get update
|
||||||
# sudo apt-get install build-essential ffmpeg
|
# sudo apt-get install build-essential ffmpeg
|
||||||
# # Install UV
|
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
# pip install --user grpcio-tools==1.63.0
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo apt-get update && \
|
||||||
|
# sudo apt-get install -y conda
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
# - name: Test vllm
|
# - name: Test vllm
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
# export PATH=$PATH:/opt/conda/bin
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
# make -C backend/python/vllm
|
||||||
|
# make -C backend/python/vllm test
|
||||||
tests-vallex:
|
tests-vallex:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -268,15 +273,21 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user grpcio-tools==1.63.0
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
- name: Test vall-e-x
|
- name: Test vall-e-x
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
make -C backend/python/vall-e-x
|
||||||
|
make -C backend/python/vall-e-x test
|
||||||
|
|
||||||
tests-coqui:
|
tests-coqui:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -289,11 +300,18 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
# Install UV
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
pip install --user grpcio-tools==1.63.0
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test coqui
|
- name: Test coqui
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
make -C backend/python/coqui
|
||||||
|
make -C backend/python/coqui test
|
||||||
|
|||||||
79
.github/workflows/test.yml
vendored
79
.github/workflows/test.yml
vendored
@@ -9,9 +9,6 @@ on:
|
|||||||
tags:
|
tags:
|
||||||
- '*'
|
- '*'
|
||||||
|
|
||||||
env:
|
|
||||||
GRPC_VERSION: v1.63.0
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
@@ -60,17 +57,16 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: ${{ matrix.go-version }}
|
go-version: ${{ matrix.go-version }}
|
||||||
cache: false
|
|
||||||
# You can test your matrix by printing the current Go version
|
# You can test your matrix by printing the current Go version
|
||||||
- name: Display Go version
|
- name: Display Go version
|
||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential curl ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
@@ -78,26 +74,8 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
# Install UV
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
|
||||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
|
||||||
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
|
||||||
pip install --user grpcio-tools
|
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||||
@@ -107,35 +85,30 @@ jobs:
|
|||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
env:
|
|
||||||
CUDA_VERSION: 12-3
|
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v3
|
||||||
with:
|
with:
|
||||||
path: grpc
|
path: grpc
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
key: ${{ runner.os }}-grpc
|
||||||
- name: Build grpc
|
- name: Build grpc
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make --jobs 5
|
../.. && sudo make -j12
|
||||||
- name: Install gRPC
|
- name: Install gRPC
|
||||||
run: |
|
run: |
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make -j12 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
GO_TAGS="stablediffusion tts" make test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.18
|
uses: mxschmitt/action-tmate@v3
|
||||||
with:
|
timeout-minutes: 5
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-aio-container:
|
tests-aio-container:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -178,7 +151,7 @@ jobs:
|
|||||||
submodules: true
|
submodules: true
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core -t local-ai:tests -f Dockerfile .
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
@@ -186,11 +159,8 @@ jobs:
|
|||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.18
|
uses: mxschmitt/action-tmate@v3
|
||||||
with:
|
timeout-minutes: 5
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-apple:
|
tests-apple:
|
||||||
runs-on: macOS-14
|
runs-on: macOS-14
|
||||||
@@ -203,28 +173,21 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: ${{ matrix.go-version }}
|
go-version: ${{ matrix.go-version }}
|
||||||
cache: false
|
|
||||||
# You can test your matrix by printing the current Go version
|
# You can test your matrix by printing the current Go version
|
||||||
- name: Display Go version
|
- name: Display Go version
|
||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
brew install protobuf grpc
|
||||||
pip install --user grpcio-tools==1.63.0
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.18
|
uses: mxschmitt/action-tmate@v3
|
||||||
with:
|
timeout-minutes: 5
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
31
.github/workflows/update_swagger.yaml
vendored
31
.github/workflows/update_swagger.yaml
vendored
@@ -1,31 +0,0 @@
|
|||||||
name: Update swagger
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: 0 20 * * *
|
|
||||||
workflow_dispatch:
|
|
||||||
jobs:
|
|
||||||
swagger:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: 'stable'
|
|
||||||
- run: |
|
|
||||||
go install github.com/swaggo/swag/cmd/swag@latest
|
|
||||||
- name: Bump swagger 🔧
|
|
||||||
run: |
|
|
||||||
make swagger
|
|
||||||
- name: Create Pull Request
|
|
||||||
uses: peter-evans/create-pull-request@v6
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
|
||||||
push-to-fork: ci-forks/LocalAI
|
|
||||||
commit-message: 'feat(swagger): update swagger'
|
|
||||||
title: 'feat(swagger): update swagger'
|
|
||||||
branch: "update/swagger"
|
|
||||||
body: Update swagger
|
|
||||||
signoff: true
|
|
||||||
|
|
||||||
18
.github/workflows/yaml-check.yml
vendored
18
.github/workflows/yaml-check.yml
vendored
@@ -1,18 +0,0 @@
|
|||||||
name: 'Yamllint GitHub Actions'
|
|
||||||
on:
|
|
||||||
- pull_request
|
|
||||||
jobs:
|
|
||||||
yamllint:
|
|
||||||
name: 'Yamllint'
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: 'Checkout'
|
|
||||||
uses: actions/checkout@master
|
|
||||||
- name: 'Yamllint'
|
|
||||||
uses: karancode/yamllint-github-action@master
|
|
||||||
with:
|
|
||||||
yamllint_file_or_dir: 'gallery'
|
|
||||||
yamllint_strict: false
|
|
||||||
yamllint_comment: true
|
|
||||||
env:
|
|
||||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -39,14 +39,3 @@ backend-assets/*
|
|||||||
!backend-assets/.keep
|
!backend-assets/.keep
|
||||||
prepare
|
prepare
|
||||||
/ggml-metal.metal
|
/ggml-metal.metal
|
||||||
|
|
||||||
# Protobuf generated files
|
|
||||||
*.pb.go
|
|
||||||
*pb2.py
|
|
||||||
*pb2_grpc.py
|
|
||||||
|
|
||||||
# SonarQube
|
|
||||||
.scannerwork
|
|
||||||
|
|
||||||
# backend virtual environments
|
|
||||||
**/venv
|
|
||||||
5
.vscode/extensions.json
vendored
5
.vscode/extensions.json
vendored
@@ -1,5 +0,0 @@
|
|||||||
{
|
|
||||||
"recommendations": [
|
|
||||||
"golang.go"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
# Contributing to LocalAI
|
# Contributing to localAI
|
||||||
|
|
||||||
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
|
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
|
||||||
|
|
||||||
@@ -29,9 +29,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
|
|
||||||
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
|
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
|
||||||
2. Navigate to the project directory: `cd LocalAI`
|
2. Navigate to the project directory: `cd LocalAI`
|
||||||
3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
|
3. Install the required dependencies: `make prepare`
|
||||||
4. Build LocalAI: `make build`
|
4. Run LocalAI: `make run`
|
||||||
5. Run LocalAI: `./local-ai`
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
@@ -60,24 +59,9 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
|||||||
|
|
||||||
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
|
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
|
||||||
|
|
||||||
### Running AIO tests
|
|
||||||
|
|
||||||
All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build the LocalAI docker image
|
|
||||||
make DOCKER_IMAGE=local-ai docker
|
|
||||||
|
|
||||||
# Build the corresponding AIO image
|
|
||||||
BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
|
||||||
|
|
||||||
# Run the AIO e2e tests
|
|
||||||
LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
|
|
||||||
```
|
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
|
- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
|
||||||
|
|
||||||
## Community and Communication
|
## Community and Communication
|
||||||
|
|
||||||
|
|||||||
298
Dockerfile
298
Dockerfile
@@ -1,45 +1,30 @@
|
|||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG BASE_IMAGE=ubuntu:22.04
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
|
||||||
|
|
||||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
# extras or core
|
||||||
FROM ${BASE_IMAGE} AS requirements-core
|
FROM ${BASE_IMAGE} as requirements-core
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG GO_VERSION=1.21.7
|
ARG GO_VERSION=1.21.7
|
||||||
|
ARG BUILD_TYPE
|
||||||
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
|
ARG CUDA_MINOR_VERSION=7
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
|
||||||
build-essential \
|
|
||||||
ca-certificates \
|
|
||||||
cmake \
|
|
||||||
curl \
|
|
||||||
git \
|
|
||||||
python3-pip \
|
|
||||||
python-is-python3 \
|
|
||||||
unzip && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
pip install --upgrade pip
|
|
||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH $PATH:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers
|
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
|
||||||
|
|
||||||
# Install grpcio-tools (the version in 22.04 is too old)
|
|
||||||
RUN pip install --user grpcio-tools
|
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
@@ -48,6 +33,16 @@ RUN update-ca-certificates
|
|||||||
RUN echo "Target Architecture: $TARGETARCH"
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
RUN echo "Target Variant: $TARGETVARIANT"
|
RUN echo "Target Variant: $TARGETVARIANT"
|
||||||
|
|
||||||
|
# CuBLAS requirements
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||||
|
apt-get install -y software-properties-common && \
|
||||||
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||||
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||||
|
; fi
|
||||||
|
|
||||||
# Cuda
|
# Cuda
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
@@ -55,12 +50,10 @@ ENV PATH /usr/local/cuda/bin:${PATH}
|
|||||||
ENV PATH /opt/rocm/bin:${PATH}
|
ENV PATH /opt/rocm/bin:${PATH}
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get install -y \
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libopenblas-dev \
|
libopenblas-dev \
|
||||||
libopencv-dev && \
|
libopencv-dev \
|
||||||
apt-get clean && \
|
&& apt-get clean
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Set up OpenCV
|
# Set up OpenCV
|
||||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
@@ -73,119 +66,35 @@ RUN test -n "$TARGETARCH" \
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
FROM requirements-core as requirements-extras
|
||||||
FROM requirements-core AS requirements-extras
|
|
||||||
|
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y conda && apt-get clean
|
||||||
|
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
RUN apt-get install -y python3-pip && apt-get clean
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
RUN apt-get update && \
|
RUN apt-get install -y espeak-ng espeak && apt-get clean
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
espeak-ng \
|
|
||||||
espeak \
|
|
||||||
python3-dev \
|
|
||||||
python3-venv && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
###################################
|
RUN if [ ! -e /usr/bin/python ]; then \
|
||||||
###################################
|
ln -s /usr/bin/python3 /usr/bin/python \
|
||||||
|
|
||||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
|
||||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
|
||||||
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
|
||||||
ARG CUDA_MAJOR_VERSION=11
|
|
||||||
ARG CUDA_MINOR_VERSION=7
|
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common && \
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libclblast-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
hipblas-dev \
|
|
||||||
rocblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
|
||||||
ldconfig \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
FROM requirements-${IMAGE_TYPE} as builder
|
||||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
|
||||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
|
||||||
|
|
||||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
|
||||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
|
||||||
ARG GRPC_VERSION=v1.58.0
|
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ca-certificates \
|
|
||||||
build-essential \
|
|
||||||
cmake \
|
|
||||||
git && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
|
||||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
|
||||||
# and running make install in the target container
|
|
||||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
mkdir -p /build/grpc/cmake/build && \
|
|
||||||
cd /build/grpc/cmake/build && \
|
|
||||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
|
||||||
make && \
|
|
||||||
make install && \
|
|
||||||
rm -rf /build
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
|
||||||
# Adjustments to the build process should likely be made here.
|
|
||||||
FROM requirements-drivers AS builder
|
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
|
ARG BUILD_GRPC=true
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
|
|
||||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
@@ -200,24 +109,26 @@ WORKDIR /build
|
|||||||
COPY . .
|
COPY . .
|
||||||
COPY .git .
|
COPY .git .
|
||||||
RUN echo "GO_TAGS: $GO_TAGS"
|
RUN echo "GO_TAGS: $GO_TAGS"
|
||||||
|
|
||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
# here so that we can generate the grpc code for the stablediffusion build
|
apt-get update && \
|
||||||
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
apt-get install -y libclblast-dev && \
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
apt-get clean \
|
||||||
rm protoc.zip
|
; fi
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make install \
|
||||||
|
; fi
|
||||||
|
|
||||||
# Rebuild with defaults backends
|
# Rebuild with defaults backends
|
||||||
WORKDIR /build
|
|
||||||
RUN make build
|
RUN make build
|
||||||
|
|
||||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
@@ -228,15 +139,12 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
FROM requirements-${IMAGE_TYPE}
|
||||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
|
||||||
FROM requirements-drivers
|
|
||||||
|
|
||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG EXTRA_BACKENDS
|
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
@@ -248,14 +156,18 @@ ARG CUDA_MAJOR_VERSION=11
|
|||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV PIP_CACHE_PURGE=true
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
|
apt-get install -y ffmpeg && apt-get clean \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
# Add OpenCL
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y libclblast1 && \
|
||||||
ffmpeg && \
|
apt-get clean \
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
@@ -267,9 +179,9 @@ WORKDIR /build
|
|||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
COPY --from=builder /build/sources ./sources/
|
COPY --from=builder /build/sources ./sources/
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=builder /build/grpc ./grpc/
|
||||||
|
|
||||||
RUN make prepare-sources
|
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
|
||||||
|
|
||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
@@ -280,58 +192,45 @@ COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
|||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||||
SHELL ["/bin/bash", "-c"]
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
make -C backend/python/autogptq \
|
||||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
|
||||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/coqui \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/parler-tts \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/diffusers \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/transformers-musicgen \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/exllama \
|
|
||||||
; fi
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/bark \
|
||||||
make -C backend/python/vall-e-x \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/diffusers \
|
||||||
make -C backend/python/petals \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/vllm \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
make -C backend/python/mamba \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/sentencetransformers \
|
make -C backend/python/sentencetransformers \
|
||||||
; fi && \
|
; fi
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/exllama2 \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/transformers \
|
make -C backend/python/transformers \
|
||||||
; fi
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/vall-e-x \
|
||||||
make -C backend/python/vllm \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/exllama \
|
||||||
make -C backend/python/autogptq \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/exllama2 \
|
||||||
make -C backend/python/bark \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/petals \
|
||||||
make -C backend/python/rerankers \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/transformers-musicgen \
|
||||||
make -C backend/python/mamba \
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
make -C backend/python/coqui \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
@@ -339,8 +238,7 @@ RUN mkdir -p /build/models
|
|||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||||
|
|
||||||
VOLUME /build/models
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||||
|
|||||||
325
Makefile
325
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
|||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=dc685be46622a8fabfd57cfa804237c8f15679b8
|
CPPLLAMA_VERSION?=56a00f0a2f48a85376f48b5ce77699df781631ae
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
|||||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_CPP_VERSION?=4ef8d9f44eb402c528ab6d990ab50a9f4f666347
|
WHISPER_CPP_VERSION?=fff24a0148fe194df4997a738eeceddd724959c3
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||||
@@ -25,10 +25,10 @@ BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
|||||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||||
|
|
||||||
# stablediffusion version
|
# stablediffusion version
|
||||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
|
||||||
|
|
||||||
# tinydream version
|
# tinydream version
|
||||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
@@ -38,7 +38,7 @@ CGO_LDFLAGS?=
|
|||||||
CGO_LDFLAGS_WHISPER?=
|
CGO_LDFLAGS_WHISPER?=
|
||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=
|
BUILD_ID?=git
|
||||||
|
|
||||||
TEST_DIR=/tmp/test
|
TEST_DIR=/tmp/test
|
||||||
|
|
||||||
@@ -99,7 +99,7 @@ endif
|
|||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||||
export LLAMA_CUBLAS=1
|
export LLAMA_CUBLAS=1
|
||||||
export WHISPER_CUDA=1
|
export WHISPER_CUBLAS=1
|
||||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
|
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -152,11 +152,9 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
|||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
@@ -181,20 +179,20 @@ endif
|
|||||||
all: help
|
all: help
|
||||||
|
|
||||||
## BERT embeddings
|
## BERT embeddings
|
||||||
sources/go-bert.cpp:
|
sources/go-bert:
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
|
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
||||||
cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
sources/go-bert/libgobert.a: sources/go-bert
|
||||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
$(MAKE) -C sources/go-bert libgobert.a
|
||||||
|
|
||||||
## go-llama.cpp
|
## go-llama-ggml
|
||||||
sources/go-llama.cpp:
|
sources/go-llama-ggml:
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
||||||
cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
||||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
sources/go-piper:
|
sources/go-piper:
|
||||||
@@ -213,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
|||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
|
|
||||||
## RWKV
|
## RWKV
|
||||||
sources/go-rwkv.cpp:
|
sources/go-rwkv:
|
||||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
|
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
|
||||||
cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
||||||
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
## stable diffusion
|
## stable diffusion
|
||||||
sources/go-stable-diffusion:
|
sources/go-stable-diffusion:
|
||||||
@@ -226,7 +224,7 @@ sources/go-stable-diffusion:
|
|||||||
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||||
|
|
||||||
## tiny-dream
|
## tiny-dream
|
||||||
sources/go-tiny-dream:
|
sources/go-tiny-dream:
|
||||||
@@ -238,24 +236,23 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
|||||||
|
|
||||||
## whisper
|
## whisper
|
||||||
sources/whisper.cpp:
|
sources/whisper.cpp:
|
||||||
git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
|
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a
|
cd sources/whisper.cpp && make libwhisper.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
||||||
@@ -266,7 +263,6 @@ dropreplace:
|
|||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
|
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
@@ -274,12 +270,12 @@ prepare-sources: get-sources replace
|
|||||||
## GENERIC
|
## GENERIC
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama-ggml clean
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||||
$(MAKE) -C sources/go-rwkv.cpp clean
|
$(MAKE) -C sources/go-rwkv clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
$(MAKE) -C sources/go-bert.cpp clean
|
$(MAKE) -C sources/go-bert clean
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) -C sources/go-tiny-dream clean
|
$(MAKE) -C sources/go-tiny-dream clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
@@ -292,13 +288,10 @@ clean: ## Remove build related file
|
|||||||
rm -rf ./sources
|
rm -rf ./sources
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
rm -rf release/
|
rm -rf release/
|
||||||
rm -rf backend-assets/*
|
rm -rf backend-assets
|
||||||
$(MAKE) -C backend/cpp/grpc clean
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
rm -rf backend/cpp/llama-* || true
|
|
||||||
$(MAKE) dropreplace
|
$(MAKE) dropreplace
|
||||||
$(MAKE) protogen-clean
|
|
||||||
rmdir pkg/grpc/proto || true
|
|
||||||
|
|
||||||
clean-tests:
|
clean-tests:
|
||||||
rm -rf test-models
|
rm -rf test-models
|
||||||
@@ -313,27 +306,9 @@ build: prepare backend-assets grpcs ## Build the project
|
|||||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||||
|
|
||||||
build-minimal:
|
dist: build
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build
|
|
||||||
|
|
||||||
build-api:
|
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
|
||||||
|
|
||||||
dist:
|
|
||||||
STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
|
|
||||||
ifeq ($(OS),Darwin)
|
|
||||||
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
|
|
||||||
else
|
|
||||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
|
||||||
endif
|
|
||||||
$(MAKE) build
|
|
||||||
mkdir -p release
|
mkdir -p release
|
||||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
|
||||||
ifeq ($(BUILD_ID),)
|
|
||||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
|
|
||||||
else
|
|
||||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
||||||
endif
|
|
||||||
|
|
||||||
osx-signed: build
|
osx-signed: build
|
||||||
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
||||||
@@ -373,7 +348,7 @@ prepare-e2e:
|
|||||||
mkdir -p $(TEST_DIR)
|
mkdir -p $(TEST_DIR)
|
||||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
||||||
|
|
||||||
run-e2e-image:
|
run-e2e-image:
|
||||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
@@ -434,152 +409,30 @@ help: ## Show this help.
|
|||||||
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
||||||
}' $(MAKEFILE_LIST)
|
}' $(MAKEFILE_LIST)
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: protogen-go protogen-python
|
protogen: protogen-go protogen-python
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean: protogen-go-clean protogen-python-clean
|
|
||||||
|
|
||||||
.PHONY: protogen-go
|
|
||||||
protogen-go:
|
protogen-go:
|
||||||
mkdir -p pkg/grpc/proto
|
|
||||||
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||||
backend/backend.proto
|
backend/backend.proto
|
||||||
|
|
||||||
.PHONY: protogen-go-clean
|
protogen-python:
|
||||||
protogen-go-clean:
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
|
||||||
$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
|
||||||
$(RM) bin/*
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
|
||||||
.PHONY: protogen-python
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
|
||||||
.PHONY: protogen-python-clean
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
|
||||||
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
|
||||||
.PHONY: autogptq-protogen
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
|
||||||
autogptq-protogen:
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto
|
||||||
$(MAKE) -C backend/python/autogptq protogen
|
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto
|
||||||
|
|
||||||
.PHONY: autogptq-protogen-clean
|
|
||||||
autogptq-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/autogptq protogen-clean
|
|
||||||
|
|
||||||
.PHONY: bark-protogen
|
|
||||||
bark-protogen:
|
|
||||||
$(MAKE) -C backend/python/bark protogen
|
|
||||||
|
|
||||||
.PHONY: bark-protogen-clean
|
|
||||||
bark-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/bark protogen-clean
|
|
||||||
|
|
||||||
.PHONY: coqui-protogen
|
|
||||||
coqui-protogen:
|
|
||||||
$(MAKE) -C backend/python/coqui protogen
|
|
||||||
|
|
||||||
.PHONY: coqui-protogen-clean
|
|
||||||
coqui-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/coqui protogen-clean
|
|
||||||
|
|
||||||
.PHONY: diffusers-protogen
|
|
||||||
diffusers-protogen:
|
|
||||||
$(MAKE) -C backend/python/diffusers protogen
|
|
||||||
|
|
||||||
.PHONY: diffusers-protogen-clean
|
|
||||||
diffusers-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
|
||||||
|
|
||||||
.PHONY: exllama-protogen
|
|
||||||
exllama-protogen:
|
|
||||||
$(MAKE) -C backend/python/exllama protogen
|
|
||||||
|
|
||||||
.PHONY: exllama-protogen-clean
|
|
||||||
exllama-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/exllama protogen-clean
|
|
||||||
|
|
||||||
.PHONY: exllama2-protogen
|
|
||||||
exllama2-protogen:
|
|
||||||
$(MAKE) -C backend/python/exllama2 protogen
|
|
||||||
|
|
||||||
.PHONY: exllama2-protogen-clean
|
|
||||||
exllama2-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
|
||||||
|
|
||||||
.PHONY: mamba-protogen
|
|
||||||
mamba-protogen:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen
|
|
||||||
|
|
||||||
.PHONY: mamba-protogen-clean
|
|
||||||
mamba-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen-clean
|
|
||||||
|
|
||||||
.PHONY: petals-protogen
|
|
||||||
petals-protogen:
|
|
||||||
$(MAKE) -C backend/python/petals protogen
|
|
||||||
|
|
||||||
.PHONY: petals-protogen-clean
|
|
||||||
petals-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/petals protogen-clean
|
|
||||||
|
|
||||||
.PHONY: rerankers-protogen
|
|
||||||
rerankers-protogen:
|
|
||||||
$(MAKE) -C backend/python/rerankers protogen
|
|
||||||
|
|
||||||
.PHONY: rerankers-protogen-clean
|
|
||||||
rerankers-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen
|
|
||||||
sentencetransformers-protogen:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen-clean
|
|
||||||
sentencetransformers-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
|
||||||
|
|
||||||
.PHONY: transformers-protogen
|
|
||||||
transformers-protogen:
|
|
||||||
$(MAKE) -C backend/python/transformers protogen
|
|
||||||
|
|
||||||
.PHONY: transformers-protogen-clean
|
|
||||||
transformers-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/transformers protogen-clean
|
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen
|
|
||||||
parler-tts-protogen:
|
|
||||||
$(MAKE) -C backend/python/parler-tts protogen
|
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen-clean
|
|
||||||
parler-tts-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/parler-tts protogen-clean
|
|
||||||
|
|
||||||
.PHONY: transformers-musicgen-protogen
|
|
||||||
transformers-musicgen-protogen:
|
|
||||||
$(MAKE) -C backend/python/transformers-musicgen protogen
|
|
||||||
|
|
||||||
.PHONY: transformers-musicgen-protogen-clean
|
|
||||||
transformers-musicgen-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
|
|
||||||
|
|
||||||
.PHONY: vall-e-x-protogen
|
|
||||||
vall-e-x-protogen:
|
|
||||||
$(MAKE) -C backend/python/vall-e-x protogen
|
|
||||||
|
|
||||||
.PHONY: vall-e-x-protogen-clean
|
|
||||||
vall-e-x-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
|
||||||
|
|
||||||
.PHONY: vllm-protogen
|
|
||||||
vllm-protogen:
|
|
||||||
$(MAKE) -C backend/python/vllm protogen
|
|
||||||
|
|
||||||
.PHONY: vllm-protogen-clean
|
|
||||||
vllm-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/vllm protogen-clean
|
|
||||||
|
|
||||||
## GRPC
|
## GRPC
|
||||||
# Note: it is duplicated in the Dockerfile
|
# Note: it is duplicated in the Dockerfile
|
||||||
prepare-extra-conda-environments: protogen-python
|
prepare-extra-conda-environments:
|
||||||
$(MAKE) -C backend/python/autogptq
|
$(MAKE) -C backend/python/autogptq
|
||||||
$(MAKE) -C backend/python/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C backend/python/coqui
|
$(MAKE) -C backend/python/coqui
|
||||||
@@ -587,16 +440,14 @@ prepare-extra-conda-environments: protogen-python
|
|||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
$(MAKE) -C backend/python/mamba
|
$(MAKE) -C backend/python/mamba
|
||||||
$(MAKE) -C backend/python/sentencetransformers
|
$(MAKE) -C backend/python/sentencetransformers
|
||||||
$(MAKE) -C backend/python/rerankers
|
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/transformers-musicgen
|
$(MAKE) -C backend/python/transformers-musicgen
|
||||||
$(MAKE) -C backend/python/parler-tts
|
|
||||||
$(MAKE) -C backend/python/vall-e-x
|
$(MAKE) -C backend/python/vall-e-x
|
||||||
$(MAKE) -C backend/python/exllama
|
$(MAKE) -C backend/python/exllama
|
||||||
$(MAKE) -C backend/python/petals
|
$(MAKE) -C backend/python/petals
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra:
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
|
||||||
@@ -620,19 +471,19 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/
|
|||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||||
|
|
||||||
backend-assets/grpc: protogen-go replace
|
backend-assets/grpc: replace
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||||
|
|
||||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||||
|
|
||||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
||||||
|
|
||||||
backend/cpp/llama/llama.cpp:
|
backend/cpp/llama/llama.cpp:
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||||
@@ -644,7 +495,7 @@ ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
|||||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||||
build-llama-cpp-grpc-server:
|
backend/cpp/llama/grpc-server:
|
||||||
# Conditionally build grpc for the llama backend to use if needed
|
# Conditionally build grpc for the llama backend to use if needed
|
||||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
$(MAKE) -C backend/cpp/grpc build
|
$(MAKE) -C backend/cpp/grpc build
|
||||||
@@ -653,58 +504,34 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
|||||||
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
||||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
||||||
$(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
$(MAKE) -C backend/cpp/llama grpc-server
|
||||||
else
|
else
|
||||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
|
||||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
|
||||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
|
||||||
$(MAKE) -C backend/cpp/llama-cuda purge
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
|
||||||
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
|
||||||
|
|
||||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||||
|
|
||||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||||
|
|
||||||
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||||
|
|
||||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||||
@@ -729,8 +556,7 @@ docker:
|
|||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="$(GO_TAGS)" \
|
--build-arg GO_TAGS=$(GO_TAGS) \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
||||||
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
-t $(DOCKER_IMAGE) .
|
-t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
@@ -738,7 +564,6 @@ docker-aio:
|
|||||||
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
|
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
||||||
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
|
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
|
||||||
|
|
||||||
docker-aio-all:
|
docker-aio-all:
|
||||||
@@ -747,20 +572,14 @@ docker-aio-all:
|
|||||||
|
|
||||||
docker-image-intel:
|
docker-image-intel:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
||||||
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
docker-image-intel-xpu:
|
docker-image-intel-xpu:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
|
||||||
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
.PHONY: swagger
|
|
||||||
swagger:
|
|
||||||
swag init -g core/http/app.go --output swagger
|
|
||||||
|
|||||||
59
README.md
59
README.md
@@ -20,14 +20,14 @@
|
|||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
|
||||||
<a href="https://hub.docker.com/r/localai/localai" target="blank">
|
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
|
||||||
<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
|
|
||||||
</a>
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
|
>
|
||||||
<img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
</a>
|
|
||||||
</p>
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||||
@@ -36,32 +36,27 @@
|
|||||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||||
</a>
|
</a>
|
||||||
</p>
|
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
||||||
>
|
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
|
||||||
|
|
||||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
|
||||||
|
|
||||||
## 🔥🔥 Hot topics / Roadmap
|
## 🔥🔥 Hot topics / Roadmap
|
||||||
|
|
||||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
|
||||||
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
|
||||||
- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
|
|
||||||
- llama3: https://github.com/mudler/LocalAI/discussions/2076
|
|
||||||
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
|
|
||||||
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
|
||||||
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
||||||
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||||
|
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726
|
||||||
|
- Upload file API: https://github.com/mudler/LocalAI/pull/1703
|
||||||
|
- Tools API support: https://github.com/mudler/LocalAI/pull/1715
|
||||||
|
- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714
|
||||||
|
- ROCm container images: https://github.com/mudler/LocalAI/pull/1595
|
||||||
|
- Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
|
||||||
|
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
|
||||||
|
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
|
||||||
|
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
|
||||||
|
- Img2vid https://github.com/mudler/LocalAI/pull/1442
|
||||||
|
|
||||||
Hot topics (looking for contributors):
|
Hot topics (looking for contributors):
|
||||||
|
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||||
@@ -72,14 +67,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
|
|
||||||
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide.
|
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`:
|
||||||
|
|
||||||
For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO(All-in-one) Image using `docker`:
|
```
|
||||||
|
docker run -ti -p 8080:8080 localai/localai:v2.9.0-ffmpeg-core phi-2
|
||||||
```bash
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|
||||||
# or, if you have an Nvidia GPU:
|
|
||||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
@@ -92,8 +83,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 🆕 [Reranker API](https://localai.io/features/reranker/)
|
|
||||||
|
|
||||||
## 💻 Usage
|
## 💻 Usage
|
||||||
|
|
||||||
@@ -114,7 +104,6 @@ Model galleries
|
|||||||
Other:
|
Other:
|
||||||
- Helm chart https://github.com/go-skynet/helm-charts
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
|
||||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
|
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
|
||||||
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
||||||
@@ -133,7 +122,7 @@ Other:
|
|||||||
|
|
||||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||||
|
|
||||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/ai/answers/tiZMDoZzZV6TLxgDXNBnFE/deploying-helm-charts-on-aws-eks)
|
||||||
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
||||||
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
||||||
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
|
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
name: text-embedding-ada-002
|
|
||||||
backend: bert-embeddings
|
backend: bert-embeddings
|
||||||
|
embeddings: true
|
||||||
|
f16: true
|
||||||
|
|
||||||
|
gpu_layers: 90
|
||||||
|
mmap: true
|
||||||
|
name: text-embedding-ada-002
|
||||||
|
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||||
|
|
||||||
|
|||||||
@@ -51,12 +51,3 @@ download_files:
|
|||||||
- filename: "stablediffusion_assets/vocab.txt"
|
- filename: "stablediffusion_assets/vocab.txt"
|
||||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
|
||||||
"step": 25,
|
|
||||||
"size": "512x512"
|
|
||||||
}'
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
name: jina-reranker-v1-base-en
|
|
||||||
backend: rerankers
|
|
||||||
parameters:
|
|
||||||
model: cross-encoder
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/v1/rerank \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "jina-reranker-v1-base-en",
|
|
||||||
"query": "Organic skincare products for sensitive skin",
|
|
||||||
"documents": [
|
|
||||||
"Eco-friendly kitchenware for modern homes",
|
|
||||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
|
||||||
"Organic cotton baby clothes for sensitive skin",
|
|
||||||
"Natural organic skincare range for sensitive skin",
|
|
||||||
"Tech gadgets for smart homes: 2024 edition",
|
|
||||||
"Sustainable gardening tools and compost solutions",
|
|
||||||
"Sensitive skin-friendly facial cleansers and toners",
|
|
||||||
"Organic food wraps and storage solutions",
|
|
||||||
"All-natural pet food for dogs with allergies",
|
|
||||||
"Yoga mats made from recycled materials"
|
|
||||||
],
|
|
||||||
"top_n": 3
|
|
||||||
}'
|
|
||||||
@@ -1,59 +1,25 @@
|
|||||||
name: gpt-4
|
name: gpt-4
|
||||||
mmap: true
|
mmap: true
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
|
||||||
|
|
||||||
template:
|
template:
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||||
{{- if .FunctionCall }}
|
{{if .Content}}{{.Content}}{{end}}
|
||||||
<tool_call>
|
<|im_end|>
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
<tool_response>
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Content}}
|
|
||||||
{{.Content }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
|
||||||
function: |
|
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
</tools>
|
|
||||||
Use the following pydantic model json schema for each tool call you will make:
|
|
||||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{'arguments': <args-dict>, 'name': <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
<tool_call>
|
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
{{.Input}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
completion: |
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
context_size: 4096
|
context_size: 2048
|
||||||
f16: true
|
f16: true
|
||||||
stopwords:
|
stopwords:
|
||||||
- <|im_end|>
|
- <|im_end|>
|
||||||
- <dummy32000>
|
- <dummy32000>
|
||||||
- "\n</tool_call>"
|
|
||||||
- "\n\n\n"
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
"model": "gpt-4",
|
"model": "phi-2-chat",
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||||
}'
|
}'
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
backend: llama-cpp
|
backend: llama-cpp
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
|
|
||||||
|
gpu_layers: 90
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4-vision-preview
|
||||||
|
|
||||||
@@ -12,6 +14,13 @@ roles:
|
|||||||
mmproj: bakllava-mmproj.gguf
|
mmproj: bakllava-mmproj.gguf
|
||||||
parameters:
|
parameters:
|
||||||
model: bakllava.gguf
|
model: bakllava.gguf
|
||||||
|
temperature: 0.2
|
||||||
|
top_k: 40
|
||||||
|
top_p: 0.95
|
||||||
|
seed: -1
|
||||||
|
mirostat: 2
|
||||||
|
mirostat_eta: 1.0
|
||||||
|
mirostat_tau: 1.0
|
||||||
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
|
|||||||
@@ -5,47 +5,9 @@ echo "===> LocalAI All-in-One (AIO) container starting..."
|
|||||||
GPU_ACCELERATION=false
|
GPU_ACCELERATION=false
|
||||||
GPU_VENDOR=""
|
GPU_VENDOR=""
|
||||||
|
|
||||||
function check_intel() {
|
function detect_gpu() {
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq intel; then
|
case "$(uname -s)" in
|
||||||
echo "Intel GPU detected"
|
Linux)
|
||||||
if [ -d /opt/intel ]; then
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=intel
|
|
||||||
else
|
|
||||||
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_nvidia_wsl() {
|
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
|
|
||||||
# We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
|
|
||||||
# Make sure the container was run with `--gpus all` as the only required parameter
|
|
||||||
echo "NVIDIA GPU detected via WSL2"
|
|
||||||
# nvidia-smi should be installed in the container
|
|
||||||
if nvidia-smi; then
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=nvidia
|
|
||||||
else
|
|
||||||
echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_amd() {
|
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq amd; then
|
|
||||||
echo "AMD GPU detected"
|
|
||||||
# Check if ROCm is installed
|
|
||||||
if [ -d /opt/rocm ]; then
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=amd
|
|
||||||
else
|
|
||||||
echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_nvidia() {
|
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
|
if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
|
||||||
echo "NVIDIA GPU detected"
|
echo "NVIDIA GPU detected"
|
||||||
# nvidia-smi should be installed in the container
|
# nvidia-smi should be installed in the container
|
||||||
@@ -55,60 +17,58 @@ function check_nvidia() {
|
|||||||
else
|
else
|
||||||
echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
|
echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
|
||||||
fi
|
fi
|
||||||
|
elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
|
||||||
|
echo "AMD GPU detected"
|
||||||
|
# Check if ROCm is installed
|
||||||
|
if [ -d /opt/rocm ]; then
|
||||||
|
GPU_ACCELERATION=true
|
||||||
|
GPU_VENDOR=amd
|
||||||
|
else
|
||||||
|
echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
|
||||||
fi
|
fi
|
||||||
}
|
elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
|
||||||
|
echo "Intel GPU detected"
|
||||||
function check_metal() {
|
if [ -d /opt/intel ]; then
|
||||||
|
GPU_ACCELERATION=true
|
||||||
|
else
|
||||||
|
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
Darwin)
|
||||||
if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
|
if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
|
||||||
echo "Apple Metal supported GPU detected"
|
echo "Apple Metal supported GPU detected"
|
||||||
GPU_ACCELERATION=true
|
GPU_ACCELERATION=true
|
||||||
GPU_VENDOR=apple
|
GPU_VENDOR=apple
|
||||||
fi
|
fi
|
||||||
}
|
|
||||||
|
|
||||||
function detect_gpu() {
|
|
||||||
case "$(uname -s)" in
|
|
||||||
Linux)
|
|
||||||
check_nvidia
|
|
||||||
check_amd
|
|
||||||
check_intel
|
|
||||||
check_nvidia_wsl
|
|
||||||
;;
|
|
||||||
Darwin)
|
|
||||||
check_metal
|
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
function detect_gpu_size() {
|
function detect_gpu_size() {
|
||||||
|
if [ "$GPU_ACCELERATION" = true ]; then
|
||||||
|
GPU_SIZE=gpu-8g
|
||||||
|
fi
|
||||||
|
|
||||||
# Attempting to find GPU memory size for NVIDIA GPUs
|
# Attempting to find GPU memory size for NVIDIA GPUs
|
||||||
if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
|
if echo "$gpu_model" | grep -iq nvidia; then
|
||||||
echo "NVIDIA GPU detected. Attempting to find memory size..."
|
echo "NVIDIA GPU detected. Attempting to find memory size..."
|
||||||
# Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
|
nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits))
|
||||||
# If handling multiple GPUs is required in the future, this is the place to do it
|
|
||||||
nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
|
|
||||||
if [ ! -z "$nvidia_sm" ]; then
|
if [ ! -z "$nvidia_sm" ]; then
|
||||||
echo "Total GPU Memory: $nvidia_sm MiB"
|
echo "Total GPU Memory: ${nvidia_sm[0]} MiB"
|
||||||
|
else
|
||||||
|
echo "Unable to determine NVIDIA GPU memory size."
|
||||||
|
fi
|
||||||
# if bigger than 8GB, use 16GB
|
# if bigger than 8GB, use 16GB
|
||||||
#if [ "$nvidia_sm" -gt 8192 ]; then
|
#if [ "$nvidia_sm" -gt 8192 ]; then
|
||||||
# GPU_SIZE=gpu-16g
|
# GPU_SIZE=gpu-16g
|
||||||
#else
|
|
||||||
GPU_SIZE=gpu-8g
|
|
||||||
#fi
|
#fi
|
||||||
else
|
else
|
||||||
echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
|
echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script."
|
||||||
GPU_SIZE=gpu-8g
|
|
||||||
fi
|
fi
|
||||||
elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
|
|
||||||
GPU_SIZE=intel
|
|
||||||
# Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
|
|
||||||
elif [ "$GPU_ACCELERATION" = true ]; then
|
|
||||||
echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
|
|
||||||
GPU_SIZE=gpu-8g
|
|
||||||
|
|
||||||
# default to cpu if GPU_SIZE is not set
|
# default to cpu if GPU_SIZE is not set
|
||||||
else
|
if [ -z "$GPU_SIZE" ]; then
|
||||||
echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
|
|
||||||
GPU_SIZE=cpu
|
GPU_SIZE=cpu
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
@@ -119,8 +79,8 @@ function check_vars() {
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$PROFILE" ]; then
|
if [ -z "$SIZE" ]; then
|
||||||
echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
|
echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
@@ -128,11 +88,11 @@ function check_vars() {
|
|||||||
detect_gpu
|
detect_gpu
|
||||||
detect_gpu_size
|
detect_gpu_size
|
||||||
|
|
||||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
SIZE="${SIZE:-$GPU_SIZE}" # default to cpu
|
||||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}"
|
||||||
|
|
||||||
check_vars
|
check_vars
|
||||||
|
|
||||||
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
|
echo "Starting LocalAI with the following models: $MODELS"
|
||||||
|
|
||||||
exec /build/entrypoint.sh "$@"
|
/build/entrypoint.sh "$@"
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
backend: sentencetransformers
|
backend: sentencetransformers
|
||||||
|
embeddings: true
|
||||||
parameters:
|
parameters:
|
||||||
model: all-MiniLM-L6-v2
|
model: all-MiniLM-L6-v2
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
parameters:
|
parameters:
|
||||||
model: DreamShaper_8_pruned.safetensors
|
model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
|
||||||
backend: diffusers
|
backend: diffusers
|
||||||
step: 25
|
step: 25
|
||||||
f16: true
|
f16: true
|
||||||
@@ -11,15 +11,12 @@ diffusers:
|
|||||||
enable_parameters: "negative_prompt,num_inference_steps"
|
enable_parameters: "negative_prompt,num_inference_steps"
|
||||||
scheduler_type: "k_dpmpp_2m"
|
scheduler_type: "k_dpmpp_2m"
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: DreamShaper_8_pruned.safetensors
|
|
||||||
uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/images/generations \
|
curl http://localhost:8080/v1/images/generations \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
"prompt": "<positive prompt>|<negative prompt>",
|
||||||
|
"model": "dreamshaper",
|
||||||
"step": 25,
|
"step": 25,
|
||||||
"size": "512x512"
|
"size": "512x512"
|
||||||
}'
|
}'
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
name: jina-reranker-v1-base-en
|
|
||||||
backend: rerankers
|
|
||||||
parameters:
|
|
||||||
model: cross-encoder
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/v1/rerank \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "jina-reranker-v1-base-en",
|
|
||||||
"query": "Organic skincare products for sensitive skin",
|
|
||||||
"documents": [
|
|
||||||
"Eco-friendly kitchenware for modern homes",
|
|
||||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
|
||||||
"Organic cotton baby clothes for sensitive skin",
|
|
||||||
"Natural organic skincare range for sensitive skin",
|
|
||||||
"Tech gadgets for smart homes: 2024 edition",
|
|
||||||
"Sustainable gardening tools and compost solutions",
|
|
||||||
"Sensitive skin-friendly facial cleansers and toners",
|
|
||||||
"Organic food wraps and storage solutions",
|
|
||||||
"All-natural pet food for dogs with allergies",
|
|
||||||
"Yoga mats made from recycled materials"
|
|
||||||
],
|
|
||||||
"top_n": 3
|
|
||||||
}'
|
|
||||||
@@ -1,27 +1,21 @@
|
|||||||
name: gpt-4
|
name: gpt-4
|
||||||
mmap: true
|
mmap: true
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||||
|
|
||||||
|
roles:
|
||||||
|
assistant_function_call: assistant
|
||||||
|
function: tool
|
||||||
template:
|
template:
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
|
||||||
{{- if .FunctionCall }}
|
{{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
|
||||||
<tool_call>
|
{{ if eq .RoleName "function" }}<tool_result>{{end}}
|
||||||
{{- else if eq .RoleName "tool" }}
|
{{if .Content}}{{.Content}}{{end}}
|
||||||
<tool_response>
|
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
|
||||||
{{- end }}
|
{{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
|
||||||
{{- if .Content}}
|
{{ if eq .RoleName "function" }}</tool_result>{{end}}
|
||||||
{{.Content }}
|
<|im_end|>
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
function: |
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
@@ -37,11 +31,11 @@ template:
|
|||||||
<tool_call>
|
<tool_call>
|
||||||
{'arguments': <args-dict>, 'name': <function-name>}
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
</tool_call><|im_end|>
|
</tool_call><|im_end|>
|
||||||
{{.Input -}}
|
{{.Input}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
<tool_call>
|
<tool_call>
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
{{.Input}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
completion: |
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
@@ -50,8 +44,6 @@ f16: true
|
|||||||
stopwords:
|
stopwords:
|
||||||
- <|im_end|>
|
- <|im_end|>
|
||||||
- <dummy32000>
|
- <dummy32000>
|
||||||
- "\n</tool_call>"
|
|
||||||
- "\n\n\n"
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
"model": "gpt-4",
|
"model": "gpt-4",
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
backend: llama-cpp
|
backend: llama-cpp
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
|
|
||||||
|
gpu_layers: 90
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4-vision-preview
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +0,0 @@
|
|||||||
name: text-embedding-ada-002
|
|
||||||
backend: sentencetransformers
|
|
||||||
parameters:
|
|
||||||
model: all-MiniLM-L6-v2
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"input": "Your text string goes here",
|
|
||||||
"model": "text-embedding-ada-002"
|
|
||||||
}'
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
name: stablediffusion
|
|
||||||
parameters:
|
|
||||||
model: runwayml/stable-diffusion-v1-5
|
|
||||||
backend: diffusers
|
|
||||||
step: 25
|
|
||||||
f16: true
|
|
||||||
diffusers:
|
|
||||||
pipeline_type: StableDiffusionPipeline
|
|
||||||
cuda: true
|
|
||||||
enable_parameters: "negative_prompt,num_inference_steps"
|
|
||||||
scheduler_type: "k_dpmpp_2m"
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
|
||||||
"step": 25,
|
|
||||||
"size": "512x512"
|
|
||||||
}'
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
name: jina-reranker-v1-base-en
|
|
||||||
backend: rerankers
|
|
||||||
parameters:
|
|
||||||
model: cross-encoder
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/v1/rerank \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "jina-reranker-v1-base-en",
|
|
||||||
"query": "Organic skincare products for sensitive skin",
|
|
||||||
"documents": [
|
|
||||||
"Eco-friendly kitchenware for modern homes",
|
|
||||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
|
||||||
"Organic cotton baby clothes for sensitive skin",
|
|
||||||
"Natural organic skincare range for sensitive skin",
|
|
||||||
"Tech gadgets for smart homes: 2024 edition",
|
|
||||||
"Sustainable gardening tools and compost solutions",
|
|
||||||
"Sensitive skin-friendly facial cleansers and toners",
|
|
||||||
"Organic food wraps and storage solutions",
|
|
||||||
"All-natural pet food for dogs with allergies",
|
|
||||||
"Yoga mats made from recycled materials"
|
|
||||||
],
|
|
||||||
"top_n": 3
|
|
||||||
}'
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
name: whisper-1
|
|
||||||
backend: whisper
|
|
||||||
parameters:
|
|
||||||
model: ggml-whisper-base.bin
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
## example audio file
|
|
||||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
|
||||||
|
|
||||||
## Send the example audio file to the transcriptions endpoint
|
|
||||||
curl http://localhost:8080/v1/audio/transcriptions \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: "ggml-whisper-base.bin"
|
|
||||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
|
||||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
name: tts-1
|
|
||||||
download_files:
|
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
model: en-us-amy-low.onnx
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
To test if this model works as expected, you can use the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"model":"tts-1",
|
|
||||||
"input": "Hi, this is a test."
|
|
||||||
}'
|
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
name: gpt-4
|
|
||||||
mmap: false
|
|
||||||
f16: false
|
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
<tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
<tool_response>
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Content}}
|
|
||||||
{{.Content }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
|
||||||
function: |
|
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
</tools>
|
|
||||||
Use the following pydantic model json schema for each tool call you will make:
|
|
||||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{'arguments': <args-dict>, 'name': <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
<tool_call>
|
|
||||||
chat: |
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
context_size: 4096
|
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- "\n</tool_call>"
|
|
||||||
- <dummy32000>
|
|
||||||
- "\n\n\n"
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4",
|
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
|
||||||
}'
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
mmap: false
|
|
||||||
f16: false
|
|
||||||
name: gpt-4-vision-preview
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
parameters:
|
|
||||||
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4-vision-preview",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
||||||
@@ -23,30 +23,6 @@ service Backend {
|
|||||||
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
||||||
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||||
|
|
||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
message RerankRequest {
|
|
||||||
string query = 1;
|
|
||||||
repeated string documents = 2;
|
|
||||||
int32 top_n = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
message RerankResult {
|
|
||||||
Usage usage = 1;
|
|
||||||
repeated DocumentResult results = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message Usage {
|
|
||||||
int32 total_tokens = 1;
|
|
||||||
int32 prompt_tokens = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message DocumentResult {
|
|
||||||
int32 index = 1;
|
|
||||||
string text = 2;
|
|
||||||
float relevance_score = 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message StoresKey {
|
message StoresKey {
|
||||||
@@ -131,15 +107,11 @@ message PredictOptions {
|
|||||||
string NegativePrompt = 40;
|
string NegativePrompt = 40;
|
||||||
int32 NDraft = 41;
|
int32 NDraft = 41;
|
||||||
repeated string Images = 42;
|
repeated string Images = 42;
|
||||||
bool UseTokenizerTemplate = 43;
|
|
||||||
repeated Message Messages = 44;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
message Reply {
|
message Reply {
|
||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
int32 tokens = 2;
|
|
||||||
int32 prompt_tokens = 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -201,7 +173,6 @@ message ModelOptions {
|
|||||||
bool EnforceEager = 52;
|
bool EnforceEager = 52;
|
||||||
int32 SwapSpace = 53;
|
int32 SwapSpace = 53;
|
||||||
int32 MaxModelLen = 54;
|
int32 MaxModelLen = 54;
|
||||||
int32 TensorParallelSize = 55;
|
|
||||||
|
|
||||||
string MMProj = 41;
|
string MMProj = 41;
|
||||||
|
|
||||||
@@ -212,9 +183,6 @@ message ModelOptions {
|
|||||||
float YarnBetaSlow = 47;
|
float YarnBetaSlow = 47;
|
||||||
|
|
||||||
string Type = 49;
|
string Type = 49;
|
||||||
|
|
||||||
bool FlashAttention = 56;
|
|
||||||
bool NoKVOffload = 57;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -288,8 +256,3 @@ message StatusResponse {
|
|||||||
State state = 1;
|
State state = 1;
|
||||||
MemoryUsageData memory = 2;
|
MemoryUsageData memory = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Message {
|
|
||||||
string role = 1;
|
|
||||||
string content = 2;
|
|
||||||
}
|
|
||||||
457
backend/backend_grpc.pb.go
Normal file
457
backend/backend_grpc.pb.go
Normal file
@@ -0,0 +1,457 @@
|
|||||||
|
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||||
|
// versions:
|
||||||
|
// - protoc-gen-go-grpc v1.2.0
|
||||||
|
// - protoc v4.23.4
|
||||||
|
// source: backend/backend.proto
|
||||||
|
|
||||||
|
package proto
|
||||||
|
|
||||||
|
import (
|
||||||
|
context "context"
|
||||||
|
grpc "google.golang.org/grpc"
|
||||||
|
codes "google.golang.org/grpc/codes"
|
||||||
|
status "google.golang.org/grpc/status"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This is a compile-time assertion to ensure that this generated file
|
||||||
|
// is compatible with the grpc package it is being compiled against.
|
||||||
|
// Requires gRPC-Go v1.32.0 or later.
|
||||||
|
const _ = grpc.SupportPackageIsVersion7
|
||||||
|
|
||||||
|
// BackendClient is the client API for Backend service.
|
||||||
|
//
|
||||||
|
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
||||||
|
type BackendClient interface {
|
||||||
|
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
|
||||||
|
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
|
||||||
|
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
|
||||||
|
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
|
||||||
|
GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
|
||||||
|
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
|
||||||
|
Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendClient struct {
|
||||||
|
cc grpc.ClientConnInterface
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
|
||||||
|
return &backendClient{cc}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
|
||||||
|
out := new(Reply)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
|
||||||
|
out := new(Reply)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
|
||||||
|
stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
x := &backendPredictStreamClient{stream}
|
||||||
|
if err := x.ClientStream.SendMsg(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := x.ClientStream.CloseSend(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return x, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type Backend_PredictStreamClient interface {
|
||||||
|
Recv() (*Reply, error)
|
||||||
|
grpc.ClientStream
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendPredictStreamClient struct {
|
||||||
|
grpc.ClientStream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *backendPredictStreamClient) Recv() (*Reply, error) {
|
||||||
|
m := new(Reply)
|
||||||
|
if err := x.ClientStream.RecvMsg(m); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
|
||||||
|
out := new(EmbeddingResult)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
|
||||||
|
out := new(TranscriptResult)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
|
||||||
|
out := new(TokenizationResponse)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
|
||||||
|
out := new(StatusResponse)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackendServer is the server API for Backend service.
|
||||||
|
// All implementations must embed UnimplementedBackendServer
|
||||||
|
// for forward compatibility
|
||||||
|
type BackendServer interface {
|
||||||
|
Health(context.Context, *HealthMessage) (*Reply, error)
|
||||||
|
Predict(context.Context, *PredictOptions) (*Reply, error)
|
||||||
|
LoadModel(context.Context, *ModelOptions) (*Result, error)
|
||||||
|
PredictStream(*PredictOptions, Backend_PredictStreamServer) error
|
||||||
|
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
|
||||||
|
GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
|
||||||
|
AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
|
||||||
|
TTS(context.Context, *TTSRequest) (*Result, error)
|
||||||
|
TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
|
||||||
|
Status(context.Context, *HealthMessage) (*StatusResponse, error)
|
||||||
|
mustEmbedUnimplementedBackendServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnimplementedBackendServer must be embedded to have forward compatible implementations.
|
||||||
|
type UnimplementedBackendServer struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
|
||||||
|
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
|
||||||
|
|
||||||
|
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
|
||||||
|
// Use of this interface is not recommended, as added methods to BackendServer will
|
||||||
|
// result in compilation errors.
|
||||||
|
type UnsafeBackendServer interface {
|
||||||
|
mustEmbedUnimplementedBackendServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
|
||||||
|
s.RegisterService(&Backend_ServiceDesc, srv)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(HealthMessage)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Health(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Health",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Predict(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Predict",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(ModelOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).LoadModel(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/LoadModel",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
|
||||||
|
m := new(PredictOptions)
|
||||||
|
if err := stream.RecvMsg(m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
|
||||||
|
}
|
||||||
|
|
||||||
|
type Backend_PredictStreamServer interface {
|
||||||
|
Send(*Reply) error
|
||||||
|
grpc.ServerStream
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendPredictStreamServer struct {
|
||||||
|
grpc.ServerStream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *backendPredictStreamServer) Send(m *Reply) error {
|
||||||
|
return x.ServerStream.SendMsg(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Embedding(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Embedding",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(GenerateImageRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).GenerateImage(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/GenerateImage",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(TranscriptRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).AudioTranscription(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/AudioTranscription",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(TTSRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).TTS(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/TTS",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).TokenizeString(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/TokenizeString",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(HealthMessage)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Status(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Status",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
|
||||||
|
// It's only intended for direct use with grpc.RegisterService,
|
||||||
|
// and not to be introspected or modified (even as a copy)
|
||||||
|
var Backend_ServiceDesc = grpc.ServiceDesc{
|
||||||
|
ServiceName: "backend.Backend",
|
||||||
|
HandlerType: (*BackendServer)(nil),
|
||||||
|
Methods: []grpc.MethodDesc{
|
||||||
|
{
|
||||||
|
MethodName: "Health",
|
||||||
|
Handler: _Backend_Health_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Predict",
|
||||||
|
Handler: _Backend_Predict_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "LoadModel",
|
||||||
|
Handler: _Backend_LoadModel_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Embedding",
|
||||||
|
Handler: _Backend_Embedding_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "GenerateImage",
|
||||||
|
Handler: _Backend_GenerateImage_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "AudioTranscription",
|
||||||
|
Handler: _Backend_AudioTranscription_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "TTS",
|
||||||
|
Handler: _Backend_TTS_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "TokenizeString",
|
||||||
|
Handler: _Backend_TokenizeString_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Status",
|
||||||
|
Handler: _Backend_Status_Handler,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Streams: []grpc.StreamDesc{
|
||||||
|
{
|
||||||
|
StreamName: "PredictStream",
|
||||||
|
Handler: _Backend_PredictStream_Handler,
|
||||||
|
ServerStreams: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Metadata: "backend/backend.proto",
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@ SYSTEM ?= $(HOST_SYSTEM)
|
|||||||
TAG_LIB_GRPC?=v1.59.0
|
TAG_LIB_GRPC?=v1.59.0
|
||||||
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
|
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
|
||||||
GIT_CLONE_DEPTH?=1
|
GIT_CLONE_DEPTH?=1
|
||||||
|
NUM_BUILD_THREADS?=$(shell nproc --ignore=1)
|
||||||
|
|
||||||
INSTALLED_PACKAGES=installed_packages
|
INSTALLED_PACKAGES=installed_packages
|
||||||
GRPC_REPO=grpc_repo
|
GRPC_REPO=grpc_repo
|
||||||
@@ -51,7 +52,7 @@ $(GRPC_REPO):
|
|||||||
|
|
||||||
$(GRPC_BUILD): $(GRPC_REPO)
|
$(GRPC_BUILD): $(GRPC_REPO)
|
||||||
mkdir -p $(GRPC_BUILD)
|
mkdir -p $(GRPC_BUILD)
|
||||||
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
|
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
||||||
|
|
||||||
build: $(INSTALLED_PACKAGES)
|
build: $(INSTALLED_PACKAGES)
|
||||||
|
|
||||||
|
|||||||
@@ -43,27 +43,35 @@ llama.cpp:
|
|||||||
|
|
||||||
llama.cpp/examples/grpc-server: llama.cpp
|
llama.cpp/examples/grpc-server: llama.cpp
|
||||||
mkdir -p llama.cpp/examples/grpc-server
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
bash prepare.sh
|
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
|
||||||
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||||
|
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||||
|
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||||
|
|
||||||
rebuild:
|
rebuild:
|
||||||
bash prepare.sh
|
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
$(MAKE) grpc-server
|
$(MAKE) grpc-server
|
||||||
|
|
||||||
purge:
|
clean:
|
||||||
rm -rf llama.cpp/build
|
rm -rf llama.cpp
|
||||||
rm -rf llama.cpp/examples/grpc-server
|
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
|
|
||||||
clean: purge
|
|
||||||
rm -rf llama.cpp
|
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
bash -c "source $(ONEAPI_VARS); \
|
bash -c "source $(ONEAPI_VARS); \
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
|
||||||
else
|
else
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||||
endif
|
endif
|
||||||
cp llama.cpp/build/bin/grpc-server .
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
@@ -2254,9 +2254,6 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
}
|
}
|
||||||
params.use_mlock = request->mlock();
|
params.use_mlock = request->mlock();
|
||||||
params.use_mmap = request->mmap();
|
params.use_mmap = request->mmap();
|
||||||
params.flash_attn = request->flashattention();
|
|
||||||
params.no_kv_offload = request->nokvoffload();
|
|
||||||
|
|
||||||
params.embedding = request->embeddings();
|
params.embedding = request->embeddings();
|
||||||
|
|
||||||
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
||||||
@@ -2335,10 +2332,6 @@ public:
|
|||||||
std::string completion_text = result.result_json.value("content", "");
|
std::string completion_text = result.result_json.value("content", "");
|
||||||
|
|
||||||
reply.set_message(completion_text);
|
reply.set_message(completion_text);
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
|
||||||
reply.set_tokens(tokens_predicted);
|
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
|
||||||
|
|
||||||
// Send the reply
|
// Send the reply
|
||||||
writer->Write(reply);
|
writer->Write(reply);
|
||||||
@@ -2364,10 +2357,6 @@ public:
|
|||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
completion_text = result.result_json.value("content", "");
|
completion_text = result.result_json.value("content", "");
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
|
||||||
reply->set_prompt_tokens(tokens_evaluated);
|
|
||||||
reply->set_tokens(tokens_predicted);
|
|
||||||
reply->set_message(completion_text);
|
reply->set_message(completion_text);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
|
||||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
|
||||||
|
|
||||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
|
||||||
echo "grpc-server already added"
|
|
||||||
else
|
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
|
||||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
|
||||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
|
||||||
@@ -4,7 +4,6 @@ package main
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
@@ -19,14 +18,9 @@ type LLM struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
var err error
|
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
||||||
hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
|
|
||||||
if hfToken == "" {
|
|
||||||
return fmt.Errorf("no huggingface token provided")
|
|
||||||
}
|
|
||||||
llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
|
|
||||||
llm.model = opts.Model
|
llm.model = opts.Model
|
||||||
return err
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/core/schema"
|
"github.com/go-skynet/LocalAI/core/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ffmpegCommand(args []string) (string, error) {
|
func runCommand(command []string) (string, error) {
|
||||||
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
|
cmd := exec.Command(command[0], command[1:]...)
|
||||||
cmd.Env = os.Environ()
|
cmd.Env = os.Environ()
|
||||||
out, err := cmd.CombinedOutput()
|
out, err := cmd.CombinedOutput()
|
||||||
return string(out), err
|
return string(out), err
|
||||||
@@ -21,16 +21,16 @@ func ffmpegCommand(args []string) (string, error) {
|
|||||||
// AudioToWav converts audio to wav for transcribe.
|
// AudioToWav converts audio to wav for transcribe.
|
||||||
// TODO: use https://github.com/mccoyst/ogg?
|
// TODO: use https://github.com/mccoyst/ogg?
|
||||||
func audioToWav(src, dst string) error {
|
func audioToWav(src, dst string) error {
|
||||||
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
||||||
out, err := ffmpegCommand(commandArgs)
|
out, err := runCommand(command)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("error: %w out: %s", err, out)
|
return fmt.Errorf("error: %w out: %s", err, out)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
|
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
|
||||||
res := schema.TranscriptionResult{}
|
res := schema.Result{}
|
||||||
|
|
||||||
dir, err := os.MkdirTemp("", "whisper")
|
dir, err := os.MkdirTemp("", "whisper")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
|
||||||
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,17 +1,4 @@
|
|||||||
.PHONY: autogptq
|
.PHONY: autogptq
|
||||||
autogptq: protogen
|
autogptq:
|
||||||
bash install.sh
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
||||||
@@ -5,14 +5,12 @@ import signal
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import base64
|
|
||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
import backend_pb2
|
import backend_pb2
|
||||||
import backend_pb2_grpc
|
import backend_pb2_grpc
|
||||||
|
|
||||||
from auto_gptq import AutoGPTQForCausalLM
|
from auto_gptq import AutoGPTQForCausalLM
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
from transformers import AutoTokenizer
|
||||||
from transformers import TextGenerationPipeline
|
from transformers import TextGenerationPipeline
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
@@ -30,18 +28,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.Device != "":
|
if request.Device != "":
|
||||||
device = request.Device
|
device = request.Device
|
||||||
|
|
||||||
# support loading local model files
|
tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=request.UseFastTokenizer)
|
||||||
model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model)
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode)
|
|
||||||
|
|
||||||
# support model `Qwen/Qwen-VL-Chat-Int4`
|
model = AutoGPTQForCausalLM.from_quantized(request.Model,
|
||||||
if "qwen-vl" in request.Model.lower():
|
|
||||||
self.model_name = "Qwen-VL-Chat"
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_path,
|
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
|
||||||
device_map="auto").eval()
|
|
||||||
else:
|
|
||||||
model = AutoGPTQForCausalLM.from_quantized(model_path,
|
|
||||||
model_basename=request.ModelBaseName,
|
model_basename=request.ModelBaseName,
|
||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
trust_remote_code=request.TrustRemoteCode,
|
||||||
@@ -66,11 +55,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.TopP != 0.0:
|
if request.TopP != 0.0:
|
||||||
top_p = request.TopP
|
top_p = request.TopP
|
||||||
|
|
||||||
|
|
||||||
prompt_images = self.recompile_vl_prompt(request)
|
|
||||||
compiled_prompt = prompt_images[0]
|
|
||||||
print(f"Prompt: {compiled_prompt}", file=sys.stderr)
|
|
||||||
|
|
||||||
# Implement Predict RPC
|
# Implement Predict RPC
|
||||||
pipeline = TextGenerationPipeline(
|
pipeline = TextGenerationPipeline(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
@@ -80,17 +64,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
repetition_penalty=penalty,
|
repetition_penalty=penalty,
|
||||||
)
|
)
|
||||||
t = pipeline(compiled_prompt)[0]["generated_text"]
|
t = pipeline(request.Prompt)[0]["generated_text"]
|
||||||
print(f"generated_text: {t}", file=sys.stderr)
|
# Remove prompt from response if present
|
||||||
|
if request.Prompt in t:
|
||||||
if compiled_prompt in t:
|
t = t.replace(request.Prompt, "")
|
||||||
t = t.replace(compiled_prompt, "")
|
|
||||||
# house keeping. Remove the image files from /tmp folder
|
|
||||||
for img_path in prompt_images[1]:
|
|
||||||
try:
|
|
||||||
os.remove(img_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
|
||||||
|
|
||||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
||||||
|
|
||||||
@@ -101,24 +78,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
# Not implemented yet
|
# Not implemented yet
|
||||||
return self.Predict(request, context)
|
return self.Predict(request, context)
|
||||||
|
|
||||||
def recompile_vl_prompt(self, request):
|
|
||||||
prompt = request.Prompt
|
|
||||||
image_paths = []
|
|
||||||
|
|
||||||
if "qwen-vl" in self.model_name.lower():
|
|
||||||
# request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename.
|
|
||||||
# Then, save the image file paths to an array "image_paths".
|
|
||||||
# read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt".
|
|
||||||
for i, img in enumerate(request.Images):
|
|
||||||
timestamp = str(int(time.time() * 1000)) # Generate timestamp
|
|
||||||
img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename
|
|
||||||
with open(img_path, "wb") as f:
|
|
||||||
f.write(base64.b64decode(img))
|
|
||||||
image_paths.append(img_path)
|
|
||||||
prompt = prompt.replace(f"[img-{i}]", "<img>" + img_path + "</img>,")
|
|
||||||
else:
|
|
||||||
prompt = request.Prompt
|
|
||||||
return (prompt, image_paths)
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
86
backend/python/autogptq/autogptq.yml
Normal file
86
backend/python/autogptq/autogptq.yml
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
name: autogptq
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=main
|
||||||
|
- _openmp_mutex=5.1=1_gnu
|
||||||
|
- bzip2=1.0.8=h7b6447c_0
|
||||||
|
- ca-certificates=2023.08.22=h06a4308_0
|
||||||
|
- ld_impl_linux-64=2.38=h1181459_1
|
||||||
|
- libffi=3.4.4=h6a678d5_0
|
||||||
|
- libgcc-ng=11.2.0=h1234567_1
|
||||||
|
- libgomp=11.2.0=h1234567_1
|
||||||
|
- libstdcxx-ng=11.2.0=h1234567_1
|
||||||
|
- libuuid=1.41.5=h5eee18b_0
|
||||||
|
- ncurses=6.4=h6a678d5_0
|
||||||
|
- openssl=3.0.11=h7f8727e_2
|
||||||
|
- pip=23.2.1=py311h06a4308_0
|
||||||
|
- python=3.11.5=h955ad1f_0
|
||||||
|
- readline=8.2=h5eee18b_0
|
||||||
|
- setuptools=68.0.0=py311h06a4308_0
|
||||||
|
- sqlite=3.41.2=h5eee18b_0
|
||||||
|
- tk=8.6.12=h1ccaba5_0
|
||||||
|
- wheel=0.41.2=py311h06a4308_0
|
||||||
|
- xz=5.4.2=h5eee18b_0
|
||||||
|
- zlib=1.2.13=h5eee18b_0
|
||||||
|
- pip:
|
||||||
|
- accelerate==0.23.0
|
||||||
|
- aiohttp==3.8.5
|
||||||
|
- aiosignal==1.3.1
|
||||||
|
- async-timeout==4.0.3
|
||||||
|
- attrs==23.1.0
|
||||||
|
- auto-gptq==0.4.2
|
||||||
|
- certifi==2023.7.22
|
||||||
|
- charset-normalizer==3.3.0
|
||||||
|
- datasets==2.14.5
|
||||||
|
- dill==0.3.7
|
||||||
|
- filelock==3.12.4
|
||||||
|
- frozenlist==1.4.0
|
||||||
|
- fsspec==2023.6.0
|
||||||
|
- grpcio==1.59.0
|
||||||
|
- huggingface-hub==0.16.4
|
||||||
|
- idna==3.4
|
||||||
|
- jinja2==3.1.2
|
||||||
|
- markupsafe==2.1.3
|
||||||
|
- mpmath==1.3.0
|
||||||
|
- multidict==6.0.4
|
||||||
|
- multiprocess==0.70.15
|
||||||
|
- networkx==3.1
|
||||||
|
- numpy==1.26.0
|
||||||
|
- nvidia-cublas-cu12==12.1.3.1
|
||||||
|
- nvidia-cuda-cupti-cu12==12.1.105
|
||||||
|
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||||
|
- nvidia-cuda-runtime-cu12==12.1.105
|
||||||
|
- nvidia-cudnn-cu12==8.9.2.26
|
||||||
|
- nvidia-cufft-cu12==11.0.2.54
|
||||||
|
- nvidia-curand-cu12==10.3.2.106
|
||||||
|
- nvidia-cusolver-cu12==11.4.5.107
|
||||||
|
- nvidia-cusparse-cu12==12.1.0.106
|
||||||
|
- nvidia-nccl-cu12==2.18.1
|
||||||
|
- nvidia-nvjitlink-cu12==12.2.140
|
||||||
|
- nvidia-nvtx-cu12==12.1.105
|
||||||
|
- packaging==23.2
|
||||||
|
- pandas==2.1.1
|
||||||
|
- peft==0.5.0
|
||||||
|
- protobuf==4.24.4
|
||||||
|
- psutil==5.9.5
|
||||||
|
- pyarrow==13.0.0
|
||||||
|
- python-dateutil==2.8.2
|
||||||
|
- pytz==2023.3.post1
|
||||||
|
- pyyaml==6.0.1
|
||||||
|
- regex==2023.10.3
|
||||||
|
- requests==2.31.0
|
||||||
|
- rouge==1.0.1
|
||||||
|
- safetensors>=0.3.3
|
||||||
|
- six==1.16.0
|
||||||
|
- sympy==1.12
|
||||||
|
- tokenizers==0.14.0
|
||||||
|
- torch==2.1.0
|
||||||
|
- tqdm==4.66.1
|
||||||
|
- transformers==4.34.0
|
||||||
|
- triton==2.1.0
|
||||||
|
- typing-extensions==4.8.0
|
||||||
|
- tzdata==2023.3
|
||||||
|
- urllib3==2.0.6
|
||||||
|
- xxhash==3.4.1
|
||||||
|
- yarl==1.9.2
|
||||||
61
backend/python/autogptq/backend_pb2.py
Normal file
61
backend/python/autogptq/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
363
backend/python/autogptq/backend_pb2_grpc.py
Normal file
363
backend/python/autogptq/backend_pb2_grpc.py
Normal file
@@ -0,0 +1,363 @@
|
|||||||
|
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||||
|
"""Client and server classes corresponding to protobuf-defined services."""
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
import backend_pb2 as backend__pb2
|
||||||
|
|
||||||
|
|
||||||
|
class BackendStub(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def __init__(self, channel):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel: A grpc.Channel.
|
||||||
|
"""
|
||||||
|
self.Health = channel.unary_unary(
|
||||||
|
'/backend.Backend/Health',
|
||||||
|
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Predict = channel.unary_unary(
|
||||||
|
'/backend.Backend/Predict',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.LoadModel = channel.unary_unary(
|
||||||
|
'/backend.Backend/LoadModel',
|
||||||
|
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.PredictStream = channel.unary_stream(
|
||||||
|
'/backend.Backend/PredictStream',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Embedding = channel.unary_unary(
|
||||||
|
'/backend.Backend/Embedding',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
||||||
|
)
|
||||||
|
self.GenerateImage = channel.unary_unary(
|
||||||
|
'/backend.Backend/GenerateImage',
|
||||||
|
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.AudioTranscription = channel.unary_unary(
|
||||||
|
'/backend.Backend/AudioTranscription',
|
||||||
|
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
||||||
|
)
|
||||||
|
self.TTS = channel.unary_unary(
|
||||||
|
'/backend.Backend/TTS',
|
||||||
|
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.TokenizeString = channel.unary_unary(
|
||||||
|
'/backend.Backend/TokenizeString',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
||||||
|
)
|
||||||
|
self.Status = channel.unary_unary(
|
||||||
|
'/backend.Backend/Status',
|
||||||
|
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.StatusResponse.FromString,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BackendServicer(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def Health(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Predict(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def PredictStream(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Embedding(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def GenerateImage(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def AudioTranscription(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def TTS(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def TokenizeString(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Status(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
|
||||||
|
def add_BackendServicer_to_server(servicer, server):
|
||||||
|
rpc_method_handlers = {
|
||||||
|
'Health': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Health,
|
||||||
|
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Predict': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Predict,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.LoadModel,
|
||||||
|
request_deserializer=backend__pb2.ModelOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
||||||
|
servicer.PredictStream,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Embedding': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Embedding,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.GenerateImage,
|
||||||
|
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.AudioTranscription,
|
||||||
|
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'TTS': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.TTS,
|
||||||
|
request_deserializer=backend__pb2.TTSRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.TokenizeString,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
'Status': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Status,
|
||||||
|
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||||
|
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
generic_handler = grpc.method_handlers_generic_handler(
|
||||||
|
'backend.Backend', rpc_method_handlers)
|
||||||
|
server.add_generic_rpc_handlers((generic_handler,))
|
||||||
|
|
||||||
|
|
||||||
|
# This class is part of an EXPERIMENTAL API.
|
||||||
|
class Backend(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Health(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
||||||
|
backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Predict(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def LoadModel(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
||||||
|
backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def PredictStream(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Embedding(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.EmbeddingResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def GenerateImage(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
||||||
|
backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def AudioTranscription(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
||||||
|
backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
backend__pb2.TranscriptResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def TTS(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
||||||
|
backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def TokenizeString(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.TokenizationResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Status(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
||||||
|
backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
backend__pb2.StatusResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch
|
|
||||||
torch
|
|
||||||
optimum[openvino]
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
accelerate
|
|
||||||
auto-gptq==0.7.1
|
|
||||||
grpcio==1.63.0
|
|
||||||
protobuf
|
|
||||||
torch
|
|
||||||
certifi
|
|
||||||
transformers
|
|
||||||
@@ -1,4 +1,14 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
##
|
||||||
|
## A bash script wrapper that runs the autogptq server with conda
|
||||||
|
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python $DIR/autogptq.py $@
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
||||||
@@ -1,29 +1,15 @@
|
|||||||
.PHONY: ttsbark
|
.PHONY: ttsbark
|
||||||
ttsbark: protogen
|
ttsbark:
|
||||||
bash install.sh
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run: protogen
|
run:
|
||||||
@echo "Running bark..."
|
@echo "Running bark..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "bark run."
|
@echo "bark run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test: protogen
|
test:
|
||||||
@echo "Testing bark..."
|
@echo "Testing bark..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "bark tested."
|
@echo "bark tested."
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
||||||
61
backend/python/bark/backend_pb2.py
Normal file
61
backend/python/bark/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
363
backend/python/bark/backend_pb2_grpc.py
Normal file
363
backend/python/bark/backend_pb2_grpc.py
Normal file
@@ -0,0 +1,363 @@
|
|||||||
|
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||||
|
"""Client and server classes corresponding to protobuf-defined services."""
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
import backend_pb2 as backend__pb2
|
||||||
|
|
||||||
|
|
||||||
|
class BackendStub(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def __init__(self, channel):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel: A grpc.Channel.
|
||||||
|
"""
|
||||||
|
self.Health = channel.unary_unary(
|
||||||
|
'/backend.Backend/Health',
|
||||||
|
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Predict = channel.unary_unary(
|
||||||
|
'/backend.Backend/Predict',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.LoadModel = channel.unary_unary(
|
||||||
|
'/backend.Backend/LoadModel',
|
||||||
|
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.PredictStream = channel.unary_stream(
|
||||||
|
'/backend.Backend/PredictStream',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Embedding = channel.unary_unary(
|
||||||
|
'/backend.Backend/Embedding',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
||||||
|
)
|
||||||
|
self.GenerateImage = channel.unary_unary(
|
||||||
|
'/backend.Backend/GenerateImage',
|
||||||
|
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.AudioTranscription = channel.unary_unary(
|
||||||
|
'/backend.Backend/AudioTranscription',
|
||||||
|
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
||||||
|
)
|
||||||
|
self.TTS = channel.unary_unary(
|
||||||
|
'/backend.Backend/TTS',
|
||||||
|
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.TokenizeString = channel.unary_unary(
|
||||||
|
'/backend.Backend/TokenizeString',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
||||||
|
)
|
||||||
|
self.Status = channel.unary_unary(
|
||||||
|
'/backend.Backend/Status',
|
||||||
|
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.StatusResponse.FromString,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BackendServicer(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def Health(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Predict(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def PredictStream(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Embedding(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def GenerateImage(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def AudioTranscription(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def TTS(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def TokenizeString(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Status(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
|
||||||
|
def add_BackendServicer_to_server(servicer, server):
|
||||||
|
rpc_method_handlers = {
|
||||||
|
'Health': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Health,
|
||||||
|
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Predict': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Predict,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.LoadModel,
|
||||||
|
request_deserializer=backend__pb2.ModelOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
||||||
|
servicer.PredictStream,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Embedding': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Embedding,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.GenerateImage,
|
||||||
|
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.AudioTranscription,
|
||||||
|
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'TTS': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.TTS,
|
||||||
|
request_deserializer=backend__pb2.TTSRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.TokenizeString,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
'Status': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Status,
|
||||||
|
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||||
|
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
generic_handler = grpc.method_handlers_generic_handler(
|
||||||
|
'backend.Backend', rpc_method_handlers)
|
||||||
|
server.add_generic_rpc_handlers((generic_handler,))
|
||||||
|
|
||||||
|
|
||||||
|
# This class is part of an EXPERIMENTAL API.
|
||||||
|
class Backend(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Health(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
||||||
|
backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Predict(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def LoadModel(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
||||||
|
backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def PredictStream(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Embedding(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.EmbeddingResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def GenerateImage(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
||||||
|
backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def AudioTranscription(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
||||||
|
backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
backend__pb2.TranscriptResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def TTS(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
||||||
|
backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def TokenizeString(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.TokenizationResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Status(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
||||||
|
backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
backend__pb2.StatusResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch
|
|
||||||
torch
|
|
||||||
torchaudio
|
|
||||||
optimum[openvino]
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
accelerate
|
|
||||||
bark==0.1.5
|
|
||||||
grpcio==1.63.0
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
transformers
|
|
||||||
@@ -1,4 +1,14 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
##
|
||||||
|
## A bash script wrapper that runs the ttsbark server with conda
|
||||||
|
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python $DIR/ttsbark.py $@
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
"""
|
"""
|
||||||
This method sets up the gRPC service by starting the server
|
This method sets up the gRPC service by starting the server
|
||||||
"""
|
"""
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
self.service = subprocess.Popen(["python3", "ttsbark.py", "--addr", "localhost:50051"])
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
|
|||||||
11
backend/python/bark/test.sh
Executable file → Normal file
11
backend/python/bark/test.sh
Executable file → Normal file
@@ -1,6 +1,11 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
##
|
||||||
|
## A bash script wrapper that runs the bark server with conda
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
runUnittests
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python -m unittest $DIR/test.py
|
||||||
21
backend/python/common-env/transformers/Makefile
Normal file
21
backend/python/common-env/transformers/Makefile
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
CONDA_ENV_PATH = "transformers.yml"
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE), cublas)
|
||||||
|
CONDA_ENV_PATH = "transformers-nvidia.yml"
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE), hipblas)
|
||||||
|
CONDA_ENV_PATH = "transformers-rocm.yml"
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Intel GPU are supposed to have dependencies installed in the main python
|
||||||
|
# environment, so we skip conda installation for SYCL builds.
|
||||||
|
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
export SKIP_CONDA=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: transformers
|
||||||
|
transformers:
|
||||||
|
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||||
|
bash install.sh $(CONDA_ENV_PATH)
|
||||||
38
backend/python/common-env/transformers/install.sh
Normal file
38
backend/python/common-env/transformers/install.sh
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||||
|
|
||||||
|
# Check if environment exist
|
||||||
|
conda_env_exists(){
|
||||||
|
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ $SKIP_CONDA -eq 1 ]; then
|
||||||
|
echo "Skipping conda environment installation"
|
||||||
|
else
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
if conda_env_exists "transformers" ; then
|
||||||
|
echo "Creating virtual environment..."
|
||||||
|
conda env create --name transformers --file $1
|
||||||
|
echo "Virtual environment created."
|
||||||
|
else
|
||||||
|
echo "Virtual environment already exists."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d "/opt/intel" ]; then
|
||||||
|
# Intel GPU: If the directory exists, we assume we are using the intel image
|
||||||
|
# (no conda env)
|
||||||
|
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||||
|
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||||
|
if [ $SKIP_CONDA -eq 0 ]; then
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
fi
|
||||||
|
|
||||||
|
pip cache purge
|
||||||
|
fi
|
||||||
120
backend/python/common-env/transformers/transformers-nvidia.yml
Normal file
120
backend/python/common-env/transformers/transformers-nvidia.yml
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
name: transformers
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=main
|
||||||
|
- _openmp_mutex=5.1=1_gnu
|
||||||
|
- bzip2=1.0.8=h7b6447c_0
|
||||||
|
- ca-certificates=2023.08.22=h06a4308_0
|
||||||
|
- ld_impl_linux-64=2.38=h1181459_1
|
||||||
|
- libffi=3.4.4=h6a678d5_0
|
||||||
|
- libgcc-ng=11.2.0=h1234567_1
|
||||||
|
- libgomp=11.2.0=h1234567_1
|
||||||
|
- libstdcxx-ng=11.2.0=h1234567_1
|
||||||
|
- libuuid=1.41.5=h5eee18b_0
|
||||||
|
- ncurses=6.4=h6a678d5_0
|
||||||
|
- openssl=3.0.11=h7f8727e_2
|
||||||
|
- pip=23.2.1=py311h06a4308_0
|
||||||
|
- python=3.11.5=h955ad1f_0
|
||||||
|
- readline=8.2=h5eee18b_0
|
||||||
|
- setuptools=68.0.0=py311h06a4308_0
|
||||||
|
- sqlite=3.41.2=h5eee18b_0
|
||||||
|
- tk=8.6.12=h1ccaba5_0
|
||||||
|
- wheel=0.41.2=py311h06a4308_0
|
||||||
|
- xz=5.4.2=h5eee18b_0
|
||||||
|
- zlib=1.2.13=h5eee18b_0
|
||||||
|
- pip:
|
||||||
|
- accelerate==0.23.0
|
||||||
|
- aiohttp==3.8.5
|
||||||
|
- aiosignal==1.3.1
|
||||||
|
- async-timeout==4.0.3
|
||||||
|
- attrs==23.1.0
|
||||||
|
- bark==0.1.5
|
||||||
|
- bitsandbytes==0.43.0
|
||||||
|
- boto3==1.28.61
|
||||||
|
- botocore==1.31.61
|
||||||
|
- certifi==2023.7.22
|
||||||
|
- TTS==0.22.0
|
||||||
|
- charset-normalizer==3.3.0
|
||||||
|
- datasets==2.14.5
|
||||||
|
- sentence-transformers==2.5.1 # Updated Version
|
||||||
|
- sentencepiece==0.1.99
|
||||||
|
- dill==0.3.7
|
||||||
|
- einops==0.7.0
|
||||||
|
- encodec==0.1.1
|
||||||
|
- filelock==3.12.4
|
||||||
|
- frozenlist==1.4.0
|
||||||
|
- fsspec==2023.6.0
|
||||||
|
- funcy==2.0
|
||||||
|
- grpcio==1.59.0
|
||||||
|
- huggingface-hub
|
||||||
|
- idna==3.4
|
||||||
|
- jinja2==3.1.2
|
||||||
|
- jmespath==1.0.1
|
||||||
|
- markupsafe==2.1.3
|
||||||
|
- mpmath==1.3.0
|
||||||
|
- multidict==6.0.4
|
||||||
|
- multiprocess==0.70.15
|
||||||
|
- networkx
|
||||||
|
- numpy==1.26.0
|
||||||
|
- nvidia-cublas-cu12==12.1.3.1
|
||||||
|
- nvidia-cuda-cupti-cu12==12.1.105
|
||||||
|
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||||
|
- nvidia-cuda-runtime-cu12==12.1.105
|
||||||
|
- nvidia-cudnn-cu12==8.9.2.26
|
||||||
|
- nvidia-cufft-cu12==11.0.2.54
|
||||||
|
- nvidia-curand-cu12==10.3.2.106
|
||||||
|
- nvidia-cusolver-cu12==11.4.5.107
|
||||||
|
- nvidia-cusparse-cu12==12.1.0.106
|
||||||
|
- nvidia-nccl-cu12==2.18.1
|
||||||
|
- nvidia-nvjitlink-cu12==12.2.140
|
||||||
|
- nvidia-nvtx-cu12==12.1.105
|
||||||
|
- packaging==23.2
|
||||||
|
- pandas
|
||||||
|
- peft==0.5.0
|
||||||
|
- protobuf==4.24.4
|
||||||
|
- psutil==5.9.5
|
||||||
|
- pyarrow==13.0.0
|
||||||
|
- python-dateutil==2.8.2
|
||||||
|
- pytz==2023.3.post1
|
||||||
|
- pyyaml==6.0.1
|
||||||
|
- regex==2023.10.3
|
||||||
|
- requests==2.31.0
|
||||||
|
- rouge==1.0.1
|
||||||
|
- s3transfer==0.7.0
|
||||||
|
- safetensors>=0.4.1
|
||||||
|
- scipy==1.12.0 # Updated Version
|
||||||
|
- six==1.16.0
|
||||||
|
- sympy==1.12
|
||||||
|
- tokenizers
|
||||||
|
- torch==2.1.2
|
||||||
|
- torchaudio==2.1.2
|
||||||
|
- tqdm==4.66.1
|
||||||
|
- triton==2.1.0
|
||||||
|
- typing-extensions==4.8.0
|
||||||
|
- tzdata==2023.3
|
||||||
|
- urllib3==1.26.17
|
||||||
|
- xxhash==3.4.1
|
||||||
|
- auto-gptq==0.6.0
|
||||||
|
- yarl==1.9.2
|
||||||
|
- soundfile
|
||||||
|
- langid
|
||||||
|
- wget
|
||||||
|
- unidecode
|
||||||
|
- pyopenjtalk-prebuilt
|
||||||
|
- pypinyin
|
||||||
|
- inflect
|
||||||
|
- cn2an
|
||||||
|
- jieba
|
||||||
|
- eng_to_ipa
|
||||||
|
- openai-whisper
|
||||||
|
- matplotlib
|
||||||
|
- gradio==3.41.2
|
||||||
|
- nltk
|
||||||
|
- sudachipy
|
||||||
|
- sudachidict_core
|
||||||
|
- vocos
|
||||||
|
- vllm==0.3.2
|
||||||
|
- transformers>=4.38.2 # Updated Version
|
||||||
|
- xformers==0.0.23.post1
|
||||||
|
prefix: /opt/conda/envs/transformers
|
||||||
109
backend/python/common-env/transformers/transformers-rocm.yml
Normal file
109
backend/python/common-env/transformers/transformers-rocm.yml
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
name: transformers
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=main
|
||||||
|
- _openmp_mutex=5.1=1_gnu
|
||||||
|
- bzip2=1.0.8=h7b6447c_0
|
||||||
|
- ca-certificates=2023.08.22=h06a4308_0
|
||||||
|
- ld_impl_linux-64=2.38=h1181459_1
|
||||||
|
- libffi=3.4.4=h6a678d5_0
|
||||||
|
- libgcc-ng=11.2.0=h1234567_1
|
||||||
|
- libgomp=11.2.0=h1234567_1
|
||||||
|
- libstdcxx-ng=11.2.0=h1234567_1
|
||||||
|
- libuuid=1.41.5=h5eee18b_0
|
||||||
|
- ncurses=6.4=h6a678d5_0
|
||||||
|
- openssl=3.0.11=h7f8727e_2
|
||||||
|
- pip=23.2.1=py311h06a4308_0
|
||||||
|
- python=3.11.5=h955ad1f_0
|
||||||
|
- readline=8.2=h5eee18b_0
|
||||||
|
- setuptools=68.0.0=py311h06a4308_0
|
||||||
|
- sqlite=3.41.2=h5eee18b_0
|
||||||
|
- tk=8.6.12=h1ccaba5_0
|
||||||
|
- wheel=0.41.2=py311h06a4308_0
|
||||||
|
- xz=5.4.2=h5eee18b_0
|
||||||
|
- zlib=1.2.13=h5eee18b_0
|
||||||
|
- pip:
|
||||||
|
- --pre
|
||||||
|
- --extra-index-url https://download.pytorch.org/whl/nightly/
|
||||||
|
- accelerate==0.23.0
|
||||||
|
- aiohttp==3.8.5
|
||||||
|
- aiosignal==1.3.1
|
||||||
|
- async-timeout==4.0.3
|
||||||
|
- attrs==23.1.0
|
||||||
|
- bark==0.1.5
|
||||||
|
- boto3==1.28.61
|
||||||
|
- botocore==1.31.61
|
||||||
|
- certifi==2023.7.22
|
||||||
|
- TTS==0.22.0
|
||||||
|
- charset-normalizer==3.3.0
|
||||||
|
- datasets==2.14.5
|
||||||
|
- sentence-transformers==2.5.1 # Updated Version
|
||||||
|
- sentencepiece==0.1.99
|
||||||
|
- dill==0.3.7
|
||||||
|
- einops==0.7.0
|
||||||
|
- encodec==0.1.1
|
||||||
|
- filelock==3.12.4
|
||||||
|
- frozenlist==1.4.0
|
||||||
|
- fsspec==2023.6.0
|
||||||
|
- funcy==2.0
|
||||||
|
- grpcio==1.59.0
|
||||||
|
- huggingface-hub
|
||||||
|
- idna==3.4
|
||||||
|
- jinja2==3.1.2
|
||||||
|
- jmespath==1.0.1
|
||||||
|
- markupsafe==2.1.3
|
||||||
|
- mpmath==1.3.0
|
||||||
|
- multidict==6.0.4
|
||||||
|
- multiprocess==0.70.15
|
||||||
|
- networkx
|
||||||
|
- numpy==1.26.0
|
||||||
|
- packaging==23.2
|
||||||
|
- pandas
|
||||||
|
- peft==0.5.0
|
||||||
|
- protobuf==4.24.4
|
||||||
|
- psutil==5.9.5
|
||||||
|
- pyarrow==13.0.0
|
||||||
|
- python-dateutil==2.8.2
|
||||||
|
- pytz==2023.3.post1
|
||||||
|
- pyyaml==6.0.1
|
||||||
|
- regex==2023.10.3
|
||||||
|
- requests==2.31.0
|
||||||
|
- rouge==1.0.1
|
||||||
|
- s3transfer==0.7.0
|
||||||
|
- safetensors>=0.4.1
|
||||||
|
- scipy==1.12.0 # Updated Version
|
||||||
|
- six==1.16.0
|
||||||
|
- sympy==1.12
|
||||||
|
- tokenizers
|
||||||
|
- torch
|
||||||
|
- torchaudio
|
||||||
|
- tqdm==4.66.1
|
||||||
|
- triton==2.1.0
|
||||||
|
- typing-extensions==4.8.0
|
||||||
|
- tzdata==2023.3
|
||||||
|
- auto-gptq==0.6.0
|
||||||
|
- urllib3==1.26.17
|
||||||
|
- xxhash==3.4.1
|
||||||
|
- yarl==1.9.2
|
||||||
|
- soundfile
|
||||||
|
- langid
|
||||||
|
- wget
|
||||||
|
- unidecode
|
||||||
|
- pyopenjtalk-prebuilt
|
||||||
|
- pypinyin
|
||||||
|
- inflect
|
||||||
|
- cn2an
|
||||||
|
- jieba
|
||||||
|
- eng_to_ipa
|
||||||
|
- openai-whisper
|
||||||
|
- matplotlib
|
||||||
|
- gradio==3.41.2
|
||||||
|
- nltk
|
||||||
|
- sudachipy
|
||||||
|
- sudachidict_core
|
||||||
|
- vocos
|
||||||
|
- vllm==0.3.2
|
||||||
|
- transformers>=4.38.2 # Updated Version
|
||||||
|
- xformers==0.0.23.post1
|
||||||
|
prefix: /opt/conda/envs/transformers
|
||||||
107
backend/python/common-env/transformers/transformers.yml
Normal file
107
backend/python/common-env/transformers/transformers.yml
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
name: transformers
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=main
|
||||||
|
- _openmp_mutex=5.1=1_gnu
|
||||||
|
- bzip2=1.0.8=h7b6447c_0
|
||||||
|
- ca-certificates=2023.08.22=h06a4308_0
|
||||||
|
- ld_impl_linux-64=2.38=h1181459_1
|
||||||
|
- libffi=3.4.4=h6a678d5_0
|
||||||
|
- libgcc-ng=11.2.0=h1234567_1
|
||||||
|
- libgomp=11.2.0=h1234567_1
|
||||||
|
- libstdcxx-ng=11.2.0=h1234567_1
|
||||||
|
- libuuid=1.41.5=h5eee18b_0
|
||||||
|
- ncurses=6.4=h6a678d5_0
|
||||||
|
- openssl=3.0.11=h7f8727e_2
|
||||||
|
- pip=23.2.1=py311h06a4308_0
|
||||||
|
- python=3.11.5=h955ad1f_0
|
||||||
|
- readline=8.2=h5eee18b_0
|
||||||
|
- setuptools=68.0.0=py311h06a4308_0
|
||||||
|
- sqlite=3.41.2=h5eee18b_0
|
||||||
|
- tk=8.6.12=h1ccaba5_0
|
||||||
|
- wheel=0.41.2=py311h06a4308_0
|
||||||
|
- xz=5.4.2=h5eee18b_0
|
||||||
|
- zlib=1.2.13=h5eee18b_0
|
||||||
|
- pip:
|
||||||
|
- accelerate==0.23.0
|
||||||
|
- aiohttp==3.8.5
|
||||||
|
- aiosignal==1.3.1
|
||||||
|
- async-timeout==4.0.3
|
||||||
|
- attrs==23.1.0
|
||||||
|
- bark==0.1.5
|
||||||
|
- boto3==1.28.61
|
||||||
|
- botocore==1.31.61
|
||||||
|
- certifi==2023.7.22
|
||||||
|
- TTS==0.22.0
|
||||||
|
- charset-normalizer==3.3.0
|
||||||
|
- datasets==2.14.5
|
||||||
|
- sentence-transformers==2.5.1 # Updated Version
|
||||||
|
- sentencepiece==0.1.99
|
||||||
|
- dill==0.3.7
|
||||||
|
- einops==0.7.0
|
||||||
|
- encodec==0.1.1
|
||||||
|
- filelock==3.12.4
|
||||||
|
- frozenlist==1.4.0
|
||||||
|
- fsspec==2023.6.0
|
||||||
|
- funcy==2.0
|
||||||
|
- grpcio==1.59.0
|
||||||
|
- huggingface-hub
|
||||||
|
- idna==3.4
|
||||||
|
- jinja2==3.1.2
|
||||||
|
- jmespath==1.0.1
|
||||||
|
- markupsafe==2.1.3
|
||||||
|
- mpmath==1.3.0
|
||||||
|
- multidict==6.0.4
|
||||||
|
- multiprocess==0.70.15
|
||||||
|
- networkx
|
||||||
|
- numpy==1.26.0
|
||||||
|
- packaging==23.2
|
||||||
|
- pandas
|
||||||
|
- peft==0.5.0
|
||||||
|
- protobuf==4.24.4
|
||||||
|
- psutil==5.9.5
|
||||||
|
- pyarrow==13.0.0
|
||||||
|
- python-dateutil==2.8.2
|
||||||
|
- pytz==2023.3.post1
|
||||||
|
- pyyaml==6.0.1
|
||||||
|
- regex==2023.10.3
|
||||||
|
- requests==2.31.0
|
||||||
|
- rouge==1.0.1
|
||||||
|
- s3transfer==0.7.0
|
||||||
|
- safetensors>=0.4.1
|
||||||
|
- scipy==1.12.0 # Updated Version
|
||||||
|
- six==1.16.0
|
||||||
|
- sympy==1.12
|
||||||
|
- tokenizers
|
||||||
|
- torch==2.1.2
|
||||||
|
- torchaudio==2.1.2
|
||||||
|
- tqdm==4.66.1
|
||||||
|
- triton==2.1.0
|
||||||
|
- typing-extensions==4.8.0
|
||||||
|
- tzdata==2023.3
|
||||||
|
- auto-gptq==0.6.0
|
||||||
|
- urllib3==1.26.17
|
||||||
|
- xxhash==3.4.1
|
||||||
|
- yarl==1.9.2
|
||||||
|
- soundfile
|
||||||
|
- langid
|
||||||
|
- wget
|
||||||
|
- unidecode
|
||||||
|
- pyopenjtalk-prebuilt
|
||||||
|
- pypinyin
|
||||||
|
- inflect
|
||||||
|
- cn2an
|
||||||
|
- jieba
|
||||||
|
- eng_to_ipa
|
||||||
|
- openai-whisper
|
||||||
|
- matplotlib
|
||||||
|
- gradio==3.41.2
|
||||||
|
- nltk
|
||||||
|
- sudachipy
|
||||||
|
- sudachidict_core
|
||||||
|
- vocos
|
||||||
|
- vllm==0.3.2
|
||||||
|
- transformers>=4.38.2 # Updated Version
|
||||||
|
- xformers==0.0.23.post1
|
||||||
|
prefix: /opt/conda/envs/transformers
|
||||||
@@ -1,213 +0,0 @@
|
|||||||
|
|
||||||
|
|
||||||
# init handles the setup of the library
|
|
||||||
#
|
|
||||||
# use the library by adding the following line to a script:
|
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
|
||||||
#
|
|
||||||
# If you want to limit what targets a backend can be used on, set the variable LIMIT_TARGETS to a
|
|
||||||
# space separated list of valid targets BEFORE sourcing the library, for example to only allow a backend
|
|
||||||
# to be used on CUDA and CPU backends:
|
|
||||||
#
|
|
||||||
# LIMIT_TARGETS="cublas cpu"
|
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
|
||||||
#
|
|
||||||
# You can use any valid BUILD_TYPE or BUILD_PROFILE, if you need to limit a backend to CUDA 12 only:
|
|
||||||
#
|
|
||||||
# LIMIT_TARGETS="cublas12"
|
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
|
||||||
#
|
|
||||||
function init() {
|
|
||||||
BACKEND_NAME=${PWD##*/}
|
|
||||||
MY_DIR=$(realpath `dirname $0`)
|
|
||||||
BUILD_PROFILE=$(getBuildProfile)
|
|
||||||
|
|
||||||
# If a backend has defined a list of valid build profiles...
|
|
||||||
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
|
||||||
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
|
||||||
if [ ${isValidTarget} != true ]; then
|
|
||||||
echo "${BACKEND_NAME} can only be used on the following targets: ${LIMIT_TARGETS}"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Initializing libbackend for ${BACKEND_NAME}"
|
|
||||||
}
|
|
||||||
|
|
||||||
# getBuildProfile will inspect the system to determine which build profile is appropriate:
|
|
||||||
# returns one of the following:
|
|
||||||
# - cublas11
|
|
||||||
# - cublas12
|
|
||||||
# - hipblas
|
|
||||||
# - intel
|
|
||||||
function getBuildProfile() {
|
|
||||||
# First check if we are a cublas build, and if so report the correct build profile
|
|
||||||
if [ x"${BUILD_TYPE}" == "xcublas" ]; then
|
|
||||||
if [ ! -z ${CUDA_MAJOR_VERSION} ]; then
|
|
||||||
# If we have been given a CUDA version, we trust it
|
|
||||||
echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION}
|
|
||||||
else
|
|
||||||
# We don't know what version of cuda we are, so we report ourselves as a generic cublas
|
|
||||||
echo ${BUILD_TYPE}
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If /opt/intel exists, then we are doing an intel/ARC build
|
|
||||||
if [ -d "/opt/intel" ]; then
|
|
||||||
echo "intel"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If for any other values of BUILD_TYPE, we don't need any special handling/discovery
|
|
||||||
if [ ! -z ${BUILD_TYPE} ]; then
|
|
||||||
echo ${BUILD_TYPE}
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If there is no BUILD_TYPE set at all, set a build-profile value of CPU, we aren't building for any GPU targets
|
|
||||||
echo "cpu"
|
|
||||||
}
|
|
||||||
|
|
||||||
# ensureVenv makes sure that the venv for the backend both exists, and is activated.
|
|
||||||
#
|
|
||||||
# This function is idempotent, so you can call it as many times as you want and it will
|
|
||||||
# always result in an activated virtual environment
|
|
||||||
function ensureVenv() {
|
|
||||||
if [ ! -d "${MY_DIR}/venv" ]; then
|
|
||||||
uv venv ${MY_DIR}/venv
|
|
||||||
echo "virtualenv created"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
|
|
||||||
source ${MY_DIR}/venv/bin/activate
|
|
||||||
echo "virtualenv activated"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "activated virtualenv has been ensured"
|
|
||||||
}
|
|
||||||
|
|
||||||
# installRequirements looks for several requirements files and if they exist runs the install for them in order
|
|
||||||
#
|
|
||||||
# - requirements-install.txt
|
|
||||||
# - requirements.txt
|
|
||||||
# - requirements-${BUILD_TYPE}.txt
|
|
||||||
# - requirements-${BUILD_PROFILE}.txt
|
|
||||||
#
|
|
||||||
# BUILD_PROFILE is a pore specific version of BUILD_TYPE, ex: cuda11 or cuda12
|
|
||||||
# it can also include some options that we do not have BUILD_TYPES for, ex: intel
|
|
||||||
#
|
|
||||||
# NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.
|
|
||||||
# you may want to add the following line to a requirements-intel.txt if you use one:
|
|
||||||
#
|
|
||||||
# --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
#
|
|
||||||
# If you need to add extra flags into the pip install command you can do so by setting the variable EXTRA_PIP_INSTALL_FLAGS
|
|
||||||
# before calling installRequirements. For example:
|
|
||||||
#
|
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
|
||||||
# EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
|
||||||
# installRequirements
|
|
||||||
function installRequirements() {
|
|
||||||
ensureVenv
|
|
||||||
|
|
||||||
# These are the requirements files we will attempt to install, in order
|
|
||||||
declare -a requirementFiles=(
|
|
||||||
"${MY_DIR}/requirements-install.txt"
|
|
||||||
"${MY_DIR}/requirements.txt"
|
|
||||||
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
|
|
||||||
)
|
|
||||||
|
|
||||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
|
||||||
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
|
|
||||||
fi
|
|
||||||
|
|
||||||
for reqFile in ${requirementFiles[@]}; do
|
|
||||||
if [ -f ${reqFile} ]; then
|
|
||||||
echo "starting requirements install for ${reqFile}"
|
|
||||||
uv pip install ${EXTRA_PIP_INSTALL_FLAGS} --requirement ${reqFile}
|
|
||||||
echo "finished requirements install for ${reqFile}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
# startBackend discovers and runs the backend GRPC server
|
|
||||||
#
|
|
||||||
# You can specify a specific backend file to execute by setting BACKEND_FILE before calling startBackend.
|
|
||||||
# example:
|
|
||||||
#
|
|
||||||
# source ../common/libbackend.sh
|
|
||||||
# BACKEND_FILE="${MY_DIR}/source/backend.py"
|
|
||||||
# startBackend $@
|
|
||||||
#
|
|
||||||
# valid filenames for autodiscovered backend servers are:
|
|
||||||
# - server.py
|
|
||||||
# - backend.py
|
|
||||||
# - ${BACKEND_NAME}.py
|
|
||||||
function startBackend() {
|
|
||||||
ensureVenv
|
|
||||||
|
|
||||||
if [ ! -z ${BACKEND_FILE} ]; then
|
|
||||||
python ${BACKEND_FILE} $@
|
|
||||||
elif [ -e "${MY_DIR}/server.py" ]; then
|
|
||||||
python ${MY_DIR}/server.py $@
|
|
||||||
elif [ -e "${MY_DIR}/backend.py" ]; then
|
|
||||||
python ${MY_DIR}/backend.py $@
|
|
||||||
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
|
|
||||||
python ${MY_DIR}/${BACKEND_NAME}.py $@
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# runUnittests discovers and runs python unittests
|
|
||||||
#
|
|
||||||
# You can specify a specific test file to use by setting TEST_FILE before calling runUnittests.
|
|
||||||
# example:
|
|
||||||
#
|
|
||||||
# source ../common/libbackend.sh
|
|
||||||
# TEST_FILE="${MY_DIR}/source/test.py"
|
|
||||||
# runUnittests $@
|
|
||||||
#
|
|
||||||
# be default a file named test.py in the backends directory will be used
|
|
||||||
function runUnittests() {
|
|
||||||
ensureVenv
|
|
||||||
|
|
||||||
if [ ! -z ${TEST_FILE} ]; then
|
|
||||||
testDir=$(dirname `realpath ${TEST_FILE}`)
|
|
||||||
testFile=$(basename ${TEST_FILE})
|
|
||||||
pushd ${testDir}
|
|
||||||
python -m unittest ${testFile}
|
|
||||||
popd
|
|
||||||
elif [ -f "${MY_DIR}/test.py" ]; then
|
|
||||||
pushd ${MY_DIR}
|
|
||||||
python -m unittest test.py
|
|
||||||
popd
|
|
||||||
else
|
|
||||||
echo "no tests defined for ${BACKEND_NAME}"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
##################################################################################
|
|
||||||
# Below here are helper functions not intended to be used outside of the library #
|
|
||||||
##################################################################################
|
|
||||||
|
|
||||||
# checkTargets determines if the current BUILD_TYPE or BUILD_PROFILE is in a list of valid targets
|
|
||||||
function checkTargets() {
|
|
||||||
# Collect all provided targets into a variable and...
|
|
||||||
targets=$@
|
|
||||||
# ...convert it into an array
|
|
||||||
declare -a targets=($targets)
|
|
||||||
|
|
||||||
for target in ${targets[@]}; do
|
|
||||||
if [ "x${BUILD_TYPE}" == "x${target}" ]; then
|
|
||||||
echo true
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
if [ "x${BUILD_PROFILE}" == "x${target}" ]; then
|
|
||||||
echo true
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
echo false
|
|
||||||
}
|
|
||||||
|
|
||||||
init
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
.DEFAULT_GOAL := install
|
|
||||||
|
|
||||||
.PHONY: install
|
|
||||||
install: protogen
|
|
||||||
bash install.sh
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import grpc
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch
|
|
||||||
torch
|
|
||||||
optimum[openvino]
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
grpcio==1.63.0
|
|
||||||
protobuf
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
||||||
@@ -1,29 +1,15 @@
|
|||||||
.PHONY: coqui
|
.PHONY: coqui
|
||||||
coqui: protogen
|
coqui:
|
||||||
bash install.sh
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run: protogen
|
run:
|
||||||
@echo "Running coqui..."
|
@echo "Running coqui..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "coqui run."
|
@echo "coqui run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test: protogen
|
test:
|
||||||
@echo "Testing coqui..."
|
@echo "Testing coqui..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "coqui tested."
|
@echo "coqui tested."
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
||||||
61
backend/python/coqui/backend_pb2.py
Normal file
61
backend/python/coqui/backend_pb2.py
Normal file
File diff suppressed because one or more lines are too long
363
backend/python/coqui/backend_pb2_grpc.py
Normal file
363
backend/python/coqui/backend_pb2_grpc.py
Normal file
@@ -0,0 +1,363 @@
|
|||||||
|
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||||
|
"""Client and server classes corresponding to protobuf-defined services."""
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
import backend_pb2 as backend__pb2
|
||||||
|
|
||||||
|
|
||||||
|
class BackendStub(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def __init__(self, channel):
|
||||||
|
"""Constructor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel: A grpc.Channel.
|
||||||
|
"""
|
||||||
|
self.Health = channel.unary_unary(
|
||||||
|
'/backend.Backend/Health',
|
||||||
|
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Predict = channel.unary_unary(
|
||||||
|
'/backend.Backend/Predict',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.LoadModel = channel.unary_unary(
|
||||||
|
'/backend.Backend/LoadModel',
|
||||||
|
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.PredictStream = channel.unary_stream(
|
||||||
|
'/backend.Backend/PredictStream',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Reply.FromString,
|
||||||
|
)
|
||||||
|
self.Embedding = channel.unary_unary(
|
||||||
|
'/backend.Backend/Embedding',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
||||||
|
)
|
||||||
|
self.GenerateImage = channel.unary_unary(
|
||||||
|
'/backend.Backend/GenerateImage',
|
||||||
|
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.AudioTranscription = channel.unary_unary(
|
||||||
|
'/backend.Backend/AudioTranscription',
|
||||||
|
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
||||||
|
)
|
||||||
|
self.TTS = channel.unary_unary(
|
||||||
|
'/backend.Backend/TTS',
|
||||||
|
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.Result.FromString,
|
||||||
|
)
|
||||||
|
self.TokenizeString = channel.unary_unary(
|
||||||
|
'/backend.Backend/TokenizeString',
|
||||||
|
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
||||||
|
)
|
||||||
|
self.Status = channel.unary_unary(
|
||||||
|
'/backend.Backend/Status',
|
||||||
|
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
response_deserializer=backend__pb2.StatusResponse.FromString,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BackendServicer(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
def Health(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Predict(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def PredictStream(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Embedding(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def GenerateImage(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def AudioTranscription(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def TTS(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def TokenizeString(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
def Status(self, request, context):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||||
|
context.set_details('Method not implemented!')
|
||||||
|
raise NotImplementedError('Method not implemented!')
|
||||||
|
|
||||||
|
|
||||||
|
def add_BackendServicer_to_server(servicer, server):
|
||||||
|
rpc_method_handlers = {
|
||||||
|
'Health': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Health,
|
||||||
|
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Predict': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Predict,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.LoadModel,
|
||||||
|
request_deserializer=backend__pb2.ModelOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
||||||
|
servicer.PredictStream,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.Reply.SerializeToString,
|
||||||
|
),
|
||||||
|
'Embedding': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Embedding,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.GenerateImage,
|
||||||
|
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.AudioTranscription,
|
||||||
|
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
||||||
|
),
|
||||||
|
'TTS': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.TTS,
|
||||||
|
request_deserializer=backend__pb2.TTSRequest.FromString,
|
||||||
|
response_serializer=backend__pb2.Result.SerializeToString,
|
||||||
|
),
|
||||||
|
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.TokenizeString,
|
||||||
|
request_deserializer=backend__pb2.PredictOptions.FromString,
|
||||||
|
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
'Status': grpc.unary_unary_rpc_method_handler(
|
||||||
|
servicer.Status,
|
||||||
|
request_deserializer=backend__pb2.HealthMessage.FromString,
|
||||||
|
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
generic_handler = grpc.method_handlers_generic_handler(
|
||||||
|
'backend.Backend', rpc_method_handlers)
|
||||||
|
server.add_generic_rpc_handlers((generic_handler,))
|
||||||
|
|
||||||
|
|
||||||
|
# This class is part of an EXPERIMENTAL API.
|
||||||
|
class Backend(object):
|
||||||
|
"""Missing associated documentation comment in .proto file."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Health(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
||||||
|
backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Predict(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def LoadModel(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
||||||
|
backend__pb2.ModelOptions.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def PredictStream(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.Reply.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Embedding(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.EmbeddingResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def GenerateImage(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
||||||
|
backend__pb2.GenerateImageRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def AudioTranscription(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
||||||
|
backend__pb2.TranscriptRequest.SerializeToString,
|
||||||
|
backend__pb2.TranscriptResult.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def TTS(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
||||||
|
backend__pb2.TTSRequest.SerializeToString,
|
||||||
|
backend__pb2.Result.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def TokenizeString(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
||||||
|
backend__pb2.PredictOptions.SerializeToString,
|
||||||
|
backend__pb2.TokenizationResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def Status(request,
|
||||||
|
target,
|
||||||
|
options=(),
|
||||||
|
channel_credentials=None,
|
||||||
|
call_credentials=None,
|
||||||
|
insecure=False,
|
||||||
|
compression=None,
|
||||||
|
wait_for_ready=None,
|
||||||
|
timeout=None,
|
||||||
|
metadata=None):
|
||||||
|
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
||||||
|
backend__pb2.HealthMessage.SerializeToString,
|
||||||
|
backend__pb2.StatusResponse.FromString,
|
||||||
|
options, channel_credentials,
|
||||||
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch
|
|
||||||
torch
|
|
||||||
torchaudio
|
|
||||||
optimum[openvino]
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
accelerate
|
|
||||||
TTS==0.22.0
|
|
||||||
grpcio==1.63.0
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
transformers
|
|
||||||
@@ -1,4 +1,14 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
##
|
||||||
|
## A bash script wrapper that runs the ttsbark server with conda
|
||||||
|
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python $DIR/coqui_server.py $@
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
"""
|
"""
|
||||||
This method sets up the gRPC service by starting the server
|
This method sets up the gRPC service by starting the server
|
||||||
"""
|
"""
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
self.service = subprocess.Popen(["python3", "coqui_server.py", "--addr", "localhost:50051"])
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
|
|||||||
11
backend/python/coqui/test.sh
Executable file → Normal file
11
backend/python/coqui/test.sh
Executable file → Normal file
@@ -1,6 +1,11 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
##
|
||||||
|
## A bash script wrapper that runs the bark server with conda
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
runUnittests
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python -m unittest $DIR/test.py
|
||||||
@@ -12,28 +12,15 @@ export SKIP_CONDA=1
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: diffusers
|
.PHONY: diffusers
|
||||||
diffusers: protogen
|
diffusers:
|
||||||
bash install.sh
|
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||||
|
bash install.sh $(CONDA_ENV_PATH)
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run: protogen
|
run:
|
||||||
@echo "Running diffusers..."
|
@echo "Running diffusers..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "Diffusers run."
|
@echo "Diffusers run."
|
||||||
|
|
||||||
test: protogen
|
test:
|
||||||
bash test.sh
|
bash test.sh
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user