Compare commits

..

3 Commits

Author SHA1 Message Date
Ettore Di Giacinto
495191a54a fix(llama.cpp): fix eos without cache 2024-03-18 12:14:16 +01:00
Ettore Di Giacinto
b790fca180 fix(whisper.cpp): Add stubs and -lcuda 2024-03-18 12:13:39 +01:00
Ettore Di Giacinto
0663f66205 deps(whisper.cpp): update, fix cublas build 2024-03-16 10:38:57 +01:00
587 changed files with 14283 additions and 51583 deletions

View File

@@ -1,16 +1,6 @@
.idea
.github
.vscode
models
examples/chatbot-ui/models
examples/rwkv/models
examples/**/models
Dockerfile*
__pycache__
# SonarQube
.scannerwork
# backend virtual environments
**/venv
backend/python/**/source
Dockerfile

View File

@@ -1,31 +0,0 @@
root = true
[*]
indent_style = space
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.go]
indent_style = tab
[Makefile]
indent_style = tab
[*.proto]
indent_size = 2
[*.py]
indent_size = 4
[*.js]
indent_size = 2
[*.yaml]
indent_size = 2
[*.md]
trim_trailing_whitespace = false

43
.env
View File

@@ -1,33 +1,33 @@
## Set number of threads.
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
# LOCALAI_THREADS=14
# THREADS=14
## Specify a different bind address (defaults to ":8080")
# LOCALAI_ADDRESS=127.0.0.1:8080
# ADDRESS=127.0.0.1:8080
## Default models context size
# LOCALAI_CONTEXT_SIZE=512
# CONTEXT_SIZE=512
#
## Define galleries.
## models will to install will be visible in `/models/available`
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
## CORS settings
# LOCALAI_CORS=true
# LOCALAI_CORS_ALLOW_ORIGINS=*
# CORS=true
# CORS_ALLOW_ORIGINS=*
## Default path for models
#
# LOCALAI_MODELS_PATH=/models
# MODELS_PATH=/models
## Enable debug mode
# LOCALAI_LOG_LEVEL=debug
# DEBUG=true
## Disables COMPEL (Diffusers)
# COMPEL=0
## Enable/Disable single backend (useful if only one GPU is available)
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
# SINGLE_ACTIVE_BACKEND=true
## Specify a build type. Available: cublas, openblas, clblas.
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
@@ -46,13 +46,13 @@
# GO_TAGS=stablediffusion
## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images
# IMAGE_PATH=/tmp
## Specify a default upload limit in MB (whisper)
# LOCALAI_UPLOAD_LIMIT=15
# UPLOAD_LIMIT
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
### Advanced settings ###
### Those are not really used by LocalAI, but from components in the stack ###
@@ -71,24 +71,19 @@
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
# LLAMACPP_PARALLEL=1
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
# LLAMACPP_GRPC_SERVERS=""
### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true
# PARALLEL_REQUESTS=true
### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time
# LOCALAI_WATCHDOG_IDLE=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
# WATCHDOG_IDLE=true
#
# Enables watchdog to kill backends that are busy for too much time
# LOCALAI_WATCHDOG_BUSY=true
# WATCHDOG_BUSY=true
#
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
# WATCHDOG_IDLE_TIMEOUT=5m
#
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
# WATCHDOG_BUSY_TIMEOUT=5m

View File

@@ -2,6 +2,6 @@
set -xe
REPO=$1
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json

View File

@@ -1,126 +0,0 @@
#!/bin/bash
# This scripts needs yq and huggingface_hub to be installed
# to install hugingface_hub run pip install huggingface_hub
# Path to the input YAML file
input_yaml=$1
# Function to download file and check checksum using Python
function check_and_update_checksum() {
model_name="$1"
file_name="$2"
uri="$3"
old_checksum="$4"
idx="$5"
# Download the file and calculate new checksum using Python
new_checksum=$(python3 -c "
import hashlib
from huggingface_hub import hf_hub_download, get_paths_info
import requests
import sys
import os
uri = '$uri'
file_name = uri.split('/')[-1]
# Function to parse the URI and determine download method
# Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
return 'huggingface', repo_path
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
download_type, repo_id_or_url = parse_uri(uri)
new_checksum = None
# Decide download method based on URI type
if download_type == 'huggingface':
# Use HF API to pull sha
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
try:
new_checksum = file.lfs.sha256
break
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
if new_checksum is None:
try:
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
else:
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
elif response.status_code == 404:
print(f'File not found: {response.status_code}', file=sys.stderr)
sys.exit(2)
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
if new_checksum is None:
new_checksum = calculate_sha256(file_path)
print(new_checksum)
os.remove(file_path)
else:
print(new_checksum)
")
if [[ "$new_checksum" == "" ]]; then
echo "Error calculating checksum for $file_name. Skipping..."
return
fi
echo "Checksum for $file_name: $new_checksum"
# Compare and update the YAML file if checksums do not match
result=$?
if [[ $result -eq 2 ]]; then
echo "File not found, deleting entry for $file_name..."
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
elif [[ "$old_checksum" != "$new_checksum" ]]; then
echo "Checksum mismatch for $file_name. Updating..."
yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
elif [[ $result -ne 0 ]]; then
echo "Error downloading file $file_name. Skipping..."
else
echo "Checksum match for $file_name. No update needed."
fi
}
# Read the YAML and process each file
len=$(yq eval '. | length' "$input_yaml")
for ((i=0; i<$len; i++))
do
name=$(yq eval ".[$i].name" "$input_yaml")
files_len=$(yq eval ".[$i].files | length" "$input_yaml")
for ((j=0; j<$files_len; j++))
do
filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
done
done

View File

@@ -1,297 +0,0 @@
package main
import (
"fmt"
"html/template"
"io/ioutil"
"os"
"gopkg.in/yaml.v3"
)
var modelPageTemplate string = `
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LocalAI models</title>
<link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
<script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
/>
<script
defer
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
></script>
<script
defer
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
></script>
<script
defer
src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
></script>
<script
defer
src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
></script>
<link href="/static/general.css" rel="stylesheet" />
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
<link
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
rel="stylesheet" />
<link
rel="stylesheet"
href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
<script src="https://cdn.tailwindcss.com/3.3.0"></script>
<script>
tailwind.config = {
darkMode: "class",
theme: {
fontFamily: {
sans: ["Roboto", "sans-serif"],
body: ["Roboto", "sans-serif"],
mono: ["ui-monospace", "monospace"],
},
},
corePlugins: {
preflight: false,
},
};
</script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
</head>
<body class="bg-gray-900 text-gray-200">
<div class="flex flex-col min-h-screen">
<nav class="bg-gray-800 shadow-lg">
<div class="container mx-auto px-4 py-4">
<div class="flex items-center justify-between">
<div class="flex items-center">
<a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
<a href="/" class="text-white text-xl font-bold">LocalAI</a>
</div>
<!-- Menu button for small screens -->
<div class="lg:hidden">
<button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
<i class="fas fa-bars fa-lg"></i>
</button>
</div>
<!-- Navigation links -->
<div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
</div>
</div>
<!-- Collapsible menu for small screens -->
<div class="hidden lg:hidden" id="mobile-menu">
<div class="pt-4 pb-3 border-t border-gray-700">
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
</div>
</div>
</div>
</nav>
<style>
.is-hidden {
display: none;
}
</style>
<div class="container mx-auto px-4 flex-grow">
<div class="models mt-12">
<h2 class="text-center text-3xl font-semibold text-gray-100">
LocalAI model gallery list </h2><br>
<h2 class="text-center text-3xl font-semibold text-gray-100">
🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
<i class="fas fa-circle-info pr-2"></i>
</a></h2>
<h3>
Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
</h3>
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
id="searchbox" placeholder="Live search keyword..">
<div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
{{ range $_, $model := .Models }}
<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
<div>
{{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
{{ if $model.Icon }}
{{ $icon = $model.Icon }}
{{ end }}
<div class="flex justify-center items-center">
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
</div>
<div class="p-6 text-surface dark:text-white">
<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
</div>
<div class="px-6 pt-4 pb-2">
<!-- Modal toggle -->
<button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
More info
</button>
<!-- Main modal -->
<div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
<div class="relative p-4 w-full max-w-2xl max-h-full">
<!-- Modal content -->
<div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
<!-- Modal header -->
<div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
<h3 class="text-xl font-semibold text-gray-900 dark:text-white">
{{ $model.Name}}
</h3>
<button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
</svg>
<span class="sr-only">Close modal</span>
</button>
</div>
<!-- Modal body -->
<div class="p-4 md:p-5 space-y-4">
<div class="flex justify-center items-center">
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
</div>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
{{ $model.Description }}
</p>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
To install the model with the CLI, run: <br>
<code> local-ai models install {{$model.Name}} </code> <br>
<hr>
See also <a href="https://localai.io/models/" target="_blank" >
Installation <i class="fas fa-circle-info pr-2"></i>
</a> to see how to install models with the REST API.
</p>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
<ul>
{{ range $_, $u := $model.URLs }}
<li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
{{ end }}
</ul>
</p>
</div>
<!-- Modal footer -->
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
<button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
{{ end }}
</div>
</div>
</div>
<script>
var lazyLoadInstance = new LazyLoad({
// Your custom settings go here
});
let cards = document.querySelectorAll('.box')
function liveSearch() {
let search_query = document.getElementById("searchbox").value;
//Use innerText if all contents are visible
//Use textContent for including hidden elements
for (var i = 0; i < cards.length; i++) {
if(cards[i].textContent.toLowerCase()
.includes(search_query.toLowerCase())) {
cards[i].classList.remove("is-hidden");
} else {
cards[i].classList.add("is-hidden");
}
}
}
//A little delay
let typingTimer;
let typeInterval = 500;
let searchInput = document.getElementById('searchbox');
searchInput.addEventListener('keyup', () => {
clearTimeout(typingTimer);
typingTimer = setTimeout(liveSearch, typeInterval);
});
</script>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
</body>
</html>
`
type GalleryModel struct {
Name string `json:"name" yaml:"name"`
URLs []string `json:"urls" yaml:"urls"`
Icon string `json:"icon" yaml:"icon"`
Description string `json:"description" yaml:"description"`
}
func main() {
// read the YAML file which contains the models
f, err := ioutil.ReadFile(os.Args[1])
if err != nil {
fmt.Println("Error reading file:", err)
return
}
models := []*GalleryModel{}
err = yaml.Unmarshal(f, &models)
if err != nil {
// write to stderr
os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
return
}
// render the template
data := struct {
Models []*GalleryModel
AvailableModels int
}{
Models: models,
AvailableModels: len(models),
}
tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
err = tmpl.Execute(os.Stdout, data)
if err != nil {
fmt.Println("Error executing template:", err)
return
}
}

View File

@@ -1,25 +0,0 @@
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "weekly"
- package-ecosystem: "github-actions"
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
directory: "/"
schedule:
# Check for updates to GitHub Actions every weekday
interval: "weekly"
- package-ecosystem: "pip"
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
directory: "/"
schedule:
# Check for updates to GitHub Actions every weekday
interval: "weekly"
- package-ecosystem: "docker"
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
directory: "/"
schedule:
# Check for updates to GitHub Actions every weekday
interval: "weekly"

24
.github/labeler.yml vendored
View File

@@ -1,24 +0,0 @@
enhancements:
- head-branch: ['^feature', 'feature']
kind/documentation:
- any:
- changed-files:
- any-glob-to-any-file: 'docs/*'
- changed-files:
- any-glob-to-any-file: '*.md'
area/ai-model:
- any:
- changed-files:
- any-glob-to-any-file: 'gallery/*'
examples:
- any:
- changed-files:
- any-glob-to-any-file: 'examples/*'
ci:
- any:
- changed-files:
- any-glob-to-any-file: '.github/*'

12
.github/release.yml vendored
View File

@@ -12,23 +12,13 @@ changelog:
- title: "Bug fixes :bug:"
labels:
- bug
- regression
- title: Exciting New Features 🎉
labels:
- Semver-Minor
- enhancement
- ux
- roadmap
- title: 🧠 Models
labels:
- area/ai-model
- title: 📖 Documentation and examples
labels:
- kind/documentation
- examples
- title: 👒 Dependencies
labels:
- dependencies
- title: Other Changes
labels:
- "*"
- "*"

View File

@@ -49,7 +49,7 @@ jobs:
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

View File

@@ -17,7 +17,7 @@ jobs:
run: |
bash .github/bump_docs.sh ${{ matrix.repository }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI

View File

@@ -1,47 +0,0 @@
name: Check if checksums are up-to-date
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
checksum_check:
runs-on: arc-runner-set
steps:
- name: Force Install GIT latest
run: |
sudo apt-get update \
&& sudo apt-get install -y software-properties-common \
&& sudo apt-get update \
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- uses: actions/checkout@v4
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y pip wget
sudo pip install --upgrade pip
pip install huggingface_hub
- name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.1.1
with:
version: 'v4.43.1'
download-compressed: true
force: true
- name: Checksum checker 🔧
run: |
export HF_HOME=/hf_cache
sudo mkdir /hf_cache
sudo chmod 777 /hf_cache
bash .github/checksum_checker.sh gallery/index.yaml
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
title: 'models(gallery): :arrow_up: update checksum'
branch: "update/checksum"
body: Updating checksums in gallery/index.yaml
signoff: true

View File

@@ -1,43 +0,0 @@
name: Dependabot auto-merge
on:
- pull_request_target
permissions:
contents: write
pull-requests: write
packages: read
jobs:
dependabot:
runs-on: ubuntu-latest
if: ${{ github.actor == 'dependabot[bot]' }}
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@v2.1.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true
- name: Checkout repository
uses: actions/checkout@v4
- name: Approve a PR if not already approved
run: |
gh pr checkout "$PR_URL"
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
then
gh pr review --approve "$PR_URL"
else
echo "PR already approved.";
fi
env:
PR_URL: ${{github.event.pull_request.html_url}}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
- name: Enable auto-merge for Dependabot PRs
if: ${{ contains(github.event.pull_request.title, 'bump')}}
run: gh pr merge --auto --squash "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}

View File

@@ -1,94 +0,0 @@
name: 'generate and publish GRPC docker caches'
on:
workflow_dispatch:
push:
branches:
- master
concurrency:
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
generate_caches:
strategy:
matrix:
include:
- grpc-base-image: ubuntu:22.04
runs-on: 'ubuntu-latest'
platforms: 'linux/amd64,linux/arm64'
runs-on: ${{matrix.runs-on}}
steps:
- name: Release space from worker
if: matrix.runs-on == 'ubuntu-latest'
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get remove -y microsoft-edge-stable || true
sudo apt-get remove -y firefox || true
sudo apt-get remove -y powershell || true
sudo apt-get remove -y r-base-core || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
sudo rm -rf /usr/share/dotnet || true
sudo rm -rf /opt/ghc || true
sudo rm -rf "/usr/local/share/boost" || true
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
df -h
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@master
- name: Checkout
uses: actions/checkout@v4
- name: Cache GRPC
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
# This means that even the MAKEFLAGS have to be an EXACT match.
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
build-args: |
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.64.0
context: .
file: ./Dockerfile
cache-to: type=gha,ignore-error=true
cache-from: type=gha
target: grpc
platforms: ${{ matrix.platforms }}
push: false

View File

@@ -1,59 +0,0 @@
name: 'generate and publish intel docker caches'
on:
workflow_dispatch:
push:
branches:
- master
concurrency:
group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
generate_caches:
strategy:
matrix:
include:
- base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest'
platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}}
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
platforms: all
- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
- name: Login to quay
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@master
- name: Checkout
uses: actions/checkout@v4
- name: Cache Intel images
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BASE_IMAGE=${{ matrix.base-image }}
context: .
file: ./Dockerfile
tags: quay.io/go-skynet/intel-oneapi-base:latest
push: true
target: intel
platforms: ${{ matrix.platforms }}

View File

@@ -22,8 +22,6 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -32,7 +30,7 @@ jobs:
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
- build-type: ''
@@ -43,10 +41,9 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -54,27 +51,22 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: 'sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
@@ -88,8 +80,6 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -106,34 +96,21 @@ jobs:
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: 'sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
base-image: "ubuntu:22.04"

View File

@@ -26,11 +26,6 @@ jobs:
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -39,7 +34,7 @@ jobs:
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
# Extra images
@@ -52,19 +47,17 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-latest: 'false'
tag-suffix: '-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11'
@@ -72,10 +65,9 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12'
@@ -83,35 +75,26 @@ jobs:
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-11"
latest-image: 'latest-gpu-nvidia-cuda-11'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
aio: "-aio-gpu-nvidia-cuda-12"
latest-image: 'latest-gpu-nvidia-cuda-12'
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
makeflags: "--jobs=3 --output-sync=target"
- build-type: ''
#platforms: 'linux/amd64,linux/arm64'
platforms: 'linux/amd64'
@@ -121,118 +104,88 @@ jobs:
image-type: 'extras'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f16-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f16"
latest-image: 'latest-gpu-intel-f16'
latest-image-aio: 'latest-aio-gpu-intel-f16'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'auto'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
tag-latest: 'false'
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f32-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
runs-on: 'arc-runner-set'
aio: "-aio-gpu-intel-f32"
latest-image: 'latest-gpu-intel-f32'
latest-image-aio: 'latest-aio-gpu-intel-f32'
makeflags: "--jobs=3 --output-sync=target"
# Core images
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f16-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f32-core'
ffmpeg: 'false'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f16-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f32'
platforms: 'linux/amd64'
tag-latest: 'false'
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
grpc-base-image: "ubuntu:22.04"
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
tag-suffix: '-sycl-f32-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build:
uses: ./.github/workflows/image_build.yml
with:
@@ -245,84 +198,60 @@ jobs:
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix:
include:
- build-type: ''
platforms: 'linux/amd64,linux/arm64'
tag-latest: 'auto'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
aio: "-aio-cpu"
latest-image: 'latest-cpu'
latest-image-aio: 'latest-aio-cpu'
makeflags: "--jobs=4 --output-sync=target"
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-core'
ffmpeg: ''
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target"
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core'
ffmpeg: ''
image-type: 'core'
base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=4 --output-sync=target"
runs-on: 'ubuntu-latest'
- build-type: 'cublas'
cuda-major-version: "11"
cuda-minor-version: "8"
cuda-minor-version: "7"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda11-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas'
cuda-major-version: "12"
cuda-minor-version: "5"
cuda-minor-version: "1"
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
latest-image: 'latest-vulkan-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"

View File

@@ -6,10 +6,6 @@ on:
inputs:
base-image:
description: 'Base image'
required: true
type: string
grpc-base-image:
description: 'GRPC Base image, must be a compatible image with base-image'
required: false
default: ''
type: string
@@ -19,11 +15,11 @@ on:
type: string
cuda-major-version:
description: 'CUDA major version'
default: "12"
default: "11"
type: string
cuda-minor-version:
description: 'CUDA minor version'
default: "5"
default: "7"
type: string
platforms:
description: 'Platforms'
@@ -33,14 +29,6 @@ on:
description: 'Tag latest'
default: ''
type: string
latest-image:
description: 'Tag latest'
default: ''
type: string
latest-image-aio:
description: 'Tag latest'
default: ''
type: string
tag-suffix:
description: 'Tag suffix'
default: ''
@@ -58,16 +46,6 @@ on:
required: true
default: ''
type: string
makeflags:
description: 'Make Flags'
required: false
default: '--jobs=4 --output-sync=target'
type: string
aio:
description: 'AIO Image Name'
required: false
default: ''
type: string
secrets:
dockerUsername:
required: true
@@ -91,7 +69,6 @@ jobs:
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v4
- name: Release space from worker
if: inputs.runs-on == 'ubuntu-latest'
run: |
@@ -133,10 +110,8 @@ jobs:
sudo rm -rf "/usr/local/share/boost" || true
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
df -h
- name: Docker meta
id: meta
if: github.event_name != 'pull_request'
uses: docker/metadata-action@v5
with:
images: |
@@ -149,46 +124,6 @@ jobs:
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
- name: Docker meta for PR
id: meta_pull_request
if: github.event_name == 'pull_request'
uses: docker/metadata-action@v5
with:
images: |
ttl.sh/localai-ci-pr-${{ github.event.number }}
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
type=sha
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
- name: Docker meta AIO (quay.io)
if: inputs.aio != ''
id: meta_aio
uses: docker/metadata-action@v5
with:
images: |
quay.io/go-skynet/local-ai
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }}
- name: Docker meta AIO (dockerhub)
if: inputs.aio != ''
id: meta_aio_dockerhub
uses: docker/metadata-action@v5
with:
images: |
localai/localai
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
flavor: |
suffix=${{ inputs.aio }}
- name: Set up QEMU
uses: docker/setup-qemu-action@master
@@ -216,13 +151,8 @@ jobs:
- name: Build and push
uses: docker/build-push-action@v5
if: github.event_name != 'pull_request'
with:
builder: ${{ steps.buildx.outputs.name }}
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
# This means that even the MAKEFLAGS have to be an EXACT match.
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
build-args: |
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
@@ -230,106 +160,12 @@ jobs:
FFMPEG=${{ inputs.ffmpeg }}
IMAGE_TYPE=${{ inputs.image-type }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile
cache-from: type=gha
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
### Start testing image
- name: Build and push
uses: docker/build-push-action@v5
if: github.event_name == 'pull_request'
with:
builder: ${{ steps.buildx.outputs.name }}
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
# This means that even the MAKEFLAGS have to be an EXACT match.
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
build-args: |
BUILD_TYPE=${{ inputs.build-type }}
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
FFMPEG=${{ inputs.ffmpeg }}
IMAGE_TYPE=${{ inputs.image-type }}
BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile
cache-from: type=gha
platforms: ${{ inputs.platforms }}
push: true
tags: ${{ steps.meta_pull_request.outputs.tags }}
labels: ${{ steps.meta_pull_request.outputs.labels }}
- name: Testing image
if: github.event_name == 'pull_request'
run: |
echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
## End testing image
- name: Build and push AIO image
if: inputs.aio != ''
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile.aio
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta_aio.outputs.tags }}
labels: ${{ steps.meta_aio.outputs.labels }}
- name: Build and push AIO image (dockerhub)
if: inputs.aio != ''
uses: docker/build-push-action@v5
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile.aio
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
- name: Latest tag
# run this on branches, when it is a tag and there is a latest-image defined
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
run: |
docker pull localai/localai:${{ steps.meta.outputs.version }}
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
docker push localai/localai:${{ inputs.latest-image }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
- name: Latest AIO tag
# run this on branches, when it is a tag and there is a latest-image defined
if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
run: |
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
docker push localai/localai:${{ inputs.latest-image-aio }}
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
- name: job summary(AIO)
if: inputs.aio != ''
run: |
echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

View File

@@ -1,12 +0,0 @@
name: "Pull Request Labeler"
on:
- pull_request_target
jobs:
labeler:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/labeler@v5

View File

@@ -1,35 +0,0 @@
name: LocalAI-bot auto-merge
on:
- pull_request_target
permissions:
contents: write
pull-requests: write
packages: read
jobs:
dependabot:
runs-on: ubuntu-latest
if: ${{ github.actor == 'localai-bot' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Approve a PR if not already approved
run: |
gh pr checkout "$PR_URL"
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
then
gh pr review --approve "$PR_URL"
else
echo "PR already approved.";
fi
env:
PR_URL: ${{github.event.pull_request.html_url}}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
- name: Enable auto-merge for LocalAIBot PRs
run: gh pr merge --auto --squash "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}

View File

@@ -1,11 +1,6 @@
name: Build and Release
on:
- push
- pull_request
env:
GRPC_VERSION: v1.64.0
on: push
permissions:
contents: write
@@ -15,225 +10,85 @@ concurrency:
cancel-in-progress: true
jobs:
build-linux-arm:
build-linux:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
- build: 'cuda12'
defines: ''
- build: 'cuda11'
defines: ''
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
- uses: actions/setup-go@v4
with:
go-version: '1.21.x'
cache: false
go-version: '>=1.21.0'
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- name: Install CUDA Dependencies
run: |
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
env:
CUDA_VERSION: 12-5
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
with:
path: grpc
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC
run: |
GNU_HOST=aarch64-linux-gnu
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
CROSS_TOOLCHAIN=/usr/$GNU_HOST
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
GRPC_DIR=$PWD/grpc
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
mkdir -p $GRPC_CROSS_BUILD_DIR && \
cd $GRPC_CROSS_BUILD_DIR && \
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
../.. && \
sudo make -j`nproc` install
- name: Build
id: build
run: |
GNU_HOST=aarch64-linux-gnu
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
CROSS_TOOLCHAIN=/usr/$GNU_HOST
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
GO_TAGS=p2p \
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
GOOS=linux \
GOARCH=arm64 \
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
- uses: actions/upload-artifact@v4
with:
name: LocalAI-linux-arm64
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-linux:
runs-on: arc-runner-set
steps:
- name: Force Install GIT latest
run: |
sudo apt-get update \
&& sudo apt-get install -y software-properties-common \
&& sudo apt-get update \
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
- name: Intel Dependencies
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo apt update
sudo apt install -y intel-basekit
sudo apt-get install build-essential ffmpeg
- name: Install CUDA Dependencies
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
run: |
if [ "${{ matrix.build }}" == "cuda12" ]; then
export CUDA_VERSION=12-3
else
export CUDA_VERSION=11-7
fi
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
env:
CUDA_VERSION: 12-3
- name: "Install Hipblas"
env:
ROCM_VERSION: "6.1"
AMDGPU_VERSION: "6.1"
run: |
set -ex
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
hipblas-dev rocm-dev \
rocblas-dev
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo ldconfig
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: grpc
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
key: ${{ runner.os }}-grpc
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target
../.. && sudo make -j12
- name: Install gRPC
run: |
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
cd grpc && cd cmake/build && sudo make -j12 install
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH
source /opt/intel/oneapi/setvars.sh
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
GO_TAGS=p2p \
BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
make -j4 dist
- uses: actions/upload-artifact@v4
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
export BUILD_TYPE=cublas
export PATH=/usr/local/cuda/bin:$PATH
make dist
else
STATIC=true make dist
fi
- uses: actions/upload-artifact@v3
with:
name: LocalAI-linux
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v2
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-stablediffusion:
runs-on: ubuntu-latest
steps:
@@ -241,72 +96,66 @@ jobs:
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
- uses: actions/setup-go@v4
with:
go-version: '1.21.x'
cache: false
go-version: '>=1.21.0'
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
sudo apt-get install -y --no-install-recommends libopencv-dev
sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
- name: Build stablediffusion
run: |
export PATH=$PATH:$GOPATH/bin
make backend-assets/grpc/stablediffusion
mkdir -p release && cp backend-assets/grpc/stablediffusion release
env:
GO_TAGS: stablediffusion
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: stablediffusion
path: release/
- name: Release
uses: softprops/action-gh-release@v2
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
build-macOS-arm64:
runs-on: macos-14
build-macOS:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
runs-on: macOS-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
- uses: actions/setup-go@v4
with:
go-version: '1.21.x'
cache: false
go-version: '>=1.21.0'
- name: Dependencies
run: |
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
- uses: actions/upload-artifact@v4
make dist
- uses: actions/upload-artifact@v3
with:
name: LocalAI-MacOS-arm64
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v2
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true

View File

@@ -1,30 +0,0 @@
name: "Security Scan"
# Run workflow each time code is pushed to your repository and on a schedule.
# The scheduled workflow runs every at 00:00 on Sunday UTC time.
on:
push:
schedule:
- cron: '0 0 * * 0'
jobs:
tests:
runs-on: ubuntu-latest
env:
GO111MODULE: on
steps:
- name: Checkout Source
uses: actions/checkout@v4
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@master
with:
# we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...'
- name: Upload SARIF file
if: ${{ github.actor != 'dependabot[bot]' }}
uses: github/codeql-action/upload-sarif@v3
with:
# Path to SARIF file relative to the root of the repository
sarif_file: results.sarif

View File

@@ -25,16 +25,23 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test transformers
run: |
make --jobs=5 --output-sync=target -C backend/python/transformers
make --jobs=5 --output-sync=target -C backend/python/transformers test
export PATH=$PATH:/opt/conda/bin
make -C backend/python/transformers
make -C backend/python/transformers test
tests-sentencetransformers:
runs-on: ubuntu-latest
@@ -47,39 +54,23 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test sentencetransformers
run: |
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
tests-rerankers:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0
- name: Test rerankers
run: |
make --jobs=5 --output-sync=target -C backend/python/rerankers
make --jobs=5 --output-sync=target -C backend/python/rerankers test
export PATH=$PATH:/opt/conda/bin
make -C backend/python/sentencetransformers
make -C backend/python/sentencetransformers test
tests-diffusers:
runs-on: ubuntu-latest
@@ -91,60 +82,25 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential ffmpeg
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user grpcio-tools==1.64.0
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test diffusers
run: |
make --jobs=5 --output-sync=target -C backend/python/diffusers
make --jobs=5 --output-sync=target -C backend/python/diffusers test
export PATH=$PATH:/opt/conda/bin
make -C backend/python/diffusers
make -C backend/python/diffusers test
tests-parler-tts:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0
- name: Test parler-tts
run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
tests-openvoice:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0
- name: Test openvoice
run: |
make --jobs=5 --output-sync=target -C backend/python/openvoice
make --jobs=5 --output-sync=target -C backend/python/openvoice test
tests-transformers-musicgen:
runs-on: ubuntu-latest
@@ -157,40 +113,54 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test transformers-musicgen
run: |
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
export PATH=$PATH:/opt/conda/bin
make -C backend/python/transformers-musicgen
make -C backend/python/transformers-musicgen test
# tests-petals:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user grpcio-tools==1.64.0
tests-petals:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
# - name: Test petals
# run: |
# make --jobs=5 --output-sync=target -C backend/python/petals
# make --jobs=5 --output-sync=target -C backend/python/petals test
- name: Test petals
run: |
export PATH=$PATH:/opt/conda/bin
make -C backend/python/petals
make -C backend/python/petals test
@@ -245,16 +215,23 @@ jobs:
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user grpcio-tools==1.64.0
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
# sudo apt-get update && \
# sudo apt-get install -y conda
# sudo apt-get install -y ca-certificates cmake curl patch
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
# sudo rm -rfv /usr/bin/conda || true
# - name: Test bark
# run: |
# make --jobs=5 --output-sync=target -C backend/python/bark
# make --jobs=5 --output-sync=target -C backend/python/bark test
# export PATH=$PATH:/opt/conda/bin
# make -C backend/python/bark
# make -C backend/python/bark test
# Below tests needs GPU. Commented out for now
@@ -270,15 +247,21 @@ jobs:
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user grpcio-tools==1.64.0
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
# sudo apt-get update && \
# sudo apt-get install -y conda
# sudo apt-get install -y ca-certificates cmake curl patch
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
# sudo rm -rfv /usr/bin/conda || true
# - name: Test vllm
# run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm
# make --jobs=5 --output-sync=target -C backend/python/vllm test
# export PATH=$PATH:/opt/conda/bin
# make -C backend/python/vllm
# make -C backend/python/vllm test
tests-vallex:
runs-on: ubuntu-latest
steps:
@@ -290,15 +273,21 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
- name: Test vall-e-x
run: |
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
export PATH=$PATH:/opt/conda/bin
make -C backend/python/vall-e-x
make -C backend/python/vall-e-x test
tests-coqui:
runs-on: ubuntu-latest
@@ -311,11 +300,18 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user grpcio-tools==1.64.0
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng
sudo rm -rfv /usr/bin/conda || true
- name: Test coqui
run: |
make --jobs=5 --output-sync=target -C backend/python/coqui
make --jobs=5 --output-sync=target -C backend/python/coqui test
export PATH=$PATH:/opt/conda/bin
make -C backend/python/coqui
make -C backend/python/coqui test

View File

@@ -9,9 +9,6 @@ on:
tags:
- '*'
env:
GRPC_VERSION: v1.64.0
concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
@@ -57,48 +54,29 @@ jobs:
df -h
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
cache: false
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential curl ffmpeg
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
sudo apt-get install -y libopencv-dev
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
export CUDACXX=/usr/local/cuda/bin/nvcc
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
# The python3-grpc-tools package in 22.04 is too old
pip install --user grpcio-tools
sudo apt-get install -y ca-certificates cmake curl patch
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
@@ -107,124 +85,49 @@ jobs:
GO_TAGS="tts" make -C sources/go-piper piper.o && \
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
env:
CUDA_VERSION: 12-3
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
- name: Cache grpc
id: cache-grpc
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: grpc
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
key: ${{ runner.os }}-grpc
- name: Build grpc
if: steps.cache-grpc.outputs.cache-hit != 'true'
run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5
../.. && sudo make -j12
- name: Install gRPC
run: |
cd grpc && cd cmake/build && sudo make --jobs 5 install
cd grpc && cd cmake/build && sudo make -j12 install
- name: Test
run: |
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
tests-aio-container:
runs-on: ubuntu-latest
steps:
- name: Release space from worker
run: |
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
df -h
echo
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
sudo rm -rf /usr/local/lib/android
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
sudo rm -rf /usr/share/dotnet
sudo apt-get remove -y '^mono-.*' || true
sudo apt-get remove -y '^ghc-.*' || true
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
sudo apt-get remove -y 'php.*' || true
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
sudo apt-get remove -y '^google-.*' || true
sudo apt-get remove -y azure-cli || true
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
sudo apt-get remove -y '^gfortran-.*' || true
sudo apt-get autoremove -y
sudo apt-get clean
echo
echo "Listing top largest packages"
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
head -n 30 <<< "${pkgs}"
echo
sudo rm -rfv build || true
df -h
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Build images
run: |
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test
run: |
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
make run-e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
GO_TAGS="stablediffusion tts" make test
tests-apple:
runs-on: macOS-14
runs-on: macOS-latest
strategy:
matrix:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v4
with:
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
cache: false
# You can test your matrix by printing the current Go version
- name: Display Go version
run: go version
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
pip install --user grpcio-tools==1.64.0
brew install protobuf grpc
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
# Used to run the newer GNUMake version from brew that supports --output-sync
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test

View File

@@ -1,31 +0,0 @@
name: Update swagger
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
swagger:
strategy:
fail-fast: false
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: 'stable'
- run: |
go install github.com/swaggo/swag/cmd/swag@latest
- name: Bump swagger 🔧
run: |
make swagger
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: 'feat(swagger): update swagger'
title: 'feat(swagger): update swagger'
branch: "update/swagger"
body: Update swagger
signoff: true

View File

@@ -1,18 +0,0 @@
name: 'Yamllint GitHub Actions'
on:
- pull_request
jobs:
yamllint:
name: 'Yamllint'
runs-on: ubuntu-latest
steps:
- name: 'Checkout'
uses: actions/checkout@master
- name: 'Yamllint'
uses: karancode/yamllint-github-action@master
with:
yamllint_file_or_dir: 'gallery'
yamllint_strict: false
yamllint_comment: true
env:
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}

15
.gitignore vendored
View File

@@ -6,9 +6,6 @@ get-sources
prepare-sources
/backend/cpp/llama/grpc-server
/backend/cpp/llama/llama.cpp
/backend/cpp/llama-*
*.log
go-ggml-transformers
go-gpt2
@@ -42,15 +39,3 @@ backend-assets/*
!backend-assets/.keep
prepare
/ggml-metal.metal
docs/static/gallery.html
# Protobuf generated files
*.pb.go
*pb2.py
*pb2_grpc.py
# SonarQube
.scannerwork
# backend virtual environments
**/venv

View File

@@ -1,5 +0,0 @@
{
"recommendations": [
"golang.go"
]
}

View File

@@ -1,4 +0,0 @@
extends: default
rules:
line-length: disable

View File

@@ -1,4 +1,4 @@
# Contributing to LocalAI
# Contributing to localAI
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
@@ -29,9 +29,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
2. Navigate to the project directory: `cd LocalAI`
3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
4. Build LocalAI: `make build`
5. Run LocalAI: `./local-ai`
3. Install the required dependencies: `make prepare`
4. Run LocalAI: `make run`
## Contributing
@@ -60,29 +59,14 @@ If you find a bug, have a feature request, or encounter any issues, please check
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
### Running AIO tests
All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
```bash
# Build the LocalAI docker image
make DOCKER_IMAGE=local-ai docker
# Build the corresponding AIO image
BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
# Run the AIO e2e tests
LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
```
## Documentation
We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
## Community and Communication
- You can reach out via the Github issue tracker.
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
---
---

View File

@@ -1,46 +1,30 @@
ARG IMAGE_TYPE=extras
ARG BASE_IMAGE=ubuntu:22.04
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
FROM ${BASE_IMAGE} AS requirements-core
# TODO(mudler): install all accellerators here
# and use make dist instead of build.
# TODO(mudler): modify make dist to build also go-piper and stablediffusion
# This way the same binary can work for everything(!)
# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies
# extras or core
FROM ${BASE_IMAGE} as requirements-core
USER root
ARG GO_VERSION=1.22.4
ARG GO_VERSION=1.21.7
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=7
ARG TARGETARCH
ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE}
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
ARG GO_TAGS="stablediffusion tinydream tts"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ccache \
ca-certificates \
cmake \
curl \
git \
unzip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
ENV PATH $PATH:/usr/local/go/bin
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
@@ -49,21 +33,27 @@ RUN update-ca-certificates
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# CuBLAS requirements
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get install -y software-properties-common && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
; fi
# Cuda
ENV PATH /usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH /opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion, tts (espeak)
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev \
espeak-ng \
espeak \
libopencv-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# OpenBLAS requirements and stable diffusion
RUN apt-get install -y \
libopenblas-dev \
libopencv-dev \
&& apt-get clean
# Set up OpenCV
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
@@ -73,260 +63,38 @@ WORKDIR /build
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
###################################
###################################
# Extras requirements
FROM requirements-core as requirements-extras
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
FROM requirements-core AS requirements-extras
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
apt-get update && \
apt-get install -y conda && apt-get clean
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH="/root/.cargo/bin:${PATH}"
RUN apt-get install -y python3-pip && apt-get clean
RUN pip install --upgrade pip
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN apt-get update && \
apt-get install -y --no-install-recommends \
python3-pip \
python-is-python3 \
python3-dev \
python3-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
pip install --upgrade pip
RUN apt-get install -y espeak-ng espeak && apt-get clean
# Install grpcio-tools (the version in 22.04 is too old)
RUN pip install --user grpcio-tools
###################################
###################################
# Base image for the build-type.
FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=5
ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# CuBLAS requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
RUN if [ ! -e /usr/bin/python ]; then \
ln -s /usr/bin/python3 /usr/bin/python \
; fi
# If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
hipblas-dev \
rocblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig \
; fi
# The build-requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MINOR_VERSION=5
ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements
RUN <<EOT bash
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
EOT
# CuBLAS requirements
RUN <<EOT bash
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
EOT
# clblas
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libclblast-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# intel
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
rm -rf /var/lib/apt/lists/*
# hipblas
RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
| tee /etc/apt/sources.list.d/amdgpu.list && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" | tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
apt update && \
apt-get install -y --no-install-recommends \
hipblas-dev rocm-dev \
rocblas-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
ldconfig
###################################
###################################
# Temporary workaround for Intel's repository to work correctly
# https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
# This is a temporary workaround until Intel fixes their repository
FROM ${INTEL_BASE_IMAGE} AS intel
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
FROM requirements-${IMAGE_TYPE} as builder
###################################
###################################
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.64.2
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
WORKDIR /build
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
build-essential \
cmake \
git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
# and running make install in the target container
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
mkdir -p /build/grpc/cmake/build && \
cd /build/grpc/cmake/build && \
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
make && \
make install && \
rm -rf /build
###################################
###################################
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM build-requirements-drivers AS builder
ARG GO_TAGS="stablediffusion tts p2p"
ARG GO_TAGS="stablediffusion tts"
ARG GRPC_BACKENDS
ARG MAKEFLAGS
ARG BUILD_GRPC=true
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS}
ENV MAKEFLAGS=${MAKEFLAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
@@ -335,73 +103,63 @@ WORKDIR /build
COPY . .
COPY .git .
RUN echo "GO_TAGS: $GO_TAGS"
RUN make prepare
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
# here so that we can generate the grpc code for the stablediffusion build
RUN <<EOT bash
if [ "amd64" = "$TARGETARCH" ]; then
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
fi
if [ "arm64" = "$TARGETARCH" ]; then
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
fi
EOT
# If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
apt-get update && \
apt-get install -y libclblast-dev && \
apt-get clean \
; fi
# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
# Install the pre-built GRPC
COPY --from=grpc /opt/grpc /usr/local
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && make -j12 install \
; fi
# Rebuild with defaults backends
WORKDIR /build
# Need to build tts and stablediffusion separately first (?)
RUN make dist && rm release/*.sha256 && mv release/* local-ai
RUN make build
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
; fi
###################################
###################################
# This is the final target. The result of this target will be the image uploaded to the registry.
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM run-requirements-drivers
FROM requirements-${IMAGE_TYPE}
ARG FFMPEG
ARG BUILD_TYPE
ARG TARGETARCH
ARG IMAGE_TYPE=extras
ARG EXTRA_BACKENDS
ARG MAKEFLAGS
ENV BUILD_TYPE=${BUILD_TYPE}
ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV MAKEFLAGS=${MAKEFLAGS}
ARG CUDA_MAJOR_VERSION=12
ARG CUDA_MAJOR_VERSION=11
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV PIP_CACHE_PURGE=true
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
apt-get install -y ffmpeg && apt-get clean \
; fi
# Add OpenCL
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
apt-get update && \
apt-get install -y libclblast1 && \
apt-get clean \
; fi
WORKDIR /build
@@ -413,75 +171,58 @@ WORKDIR /build
COPY . .
COPY --from=builder /build/sources ./sources/
COPY --from=grpc /opt/grpc /usr/local
COPY --from=builder /build/grpc ./grpc/
RUN make prepare-sources
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
# Copy the binary
COPY --from=builder /build/local-ai ./
# Copy shared libraries for piper
# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
# Change the shell to bash so we can use [[ tests below
SHELL ["/bin/bash", "-c"]
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/coqui \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/parler-tts \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/diffusers \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers-musicgen \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama \
## Duplicated from Makefile to avoid having a big layer that's hard to push
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/autogptq \
; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/vall-e-x \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/openvoice \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/petals \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/sentencetransformers \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama2 \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/bark \
; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/vllm \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/autogptq \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/bark \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/rerankers \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/mamba \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/diffusers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/vllm \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/mamba \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/sentencetransformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/transformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/vall-e-x \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/exllama \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/exllama2 \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/petals \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/transformers-musicgen \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/coqui \
; fi
# Make sure the models directory exists
@@ -489,8 +230,7 @@ RUN mkdir -p /build/models
# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
VOLUME /build/models
EXPOSE 8080
ENTRYPOINT [ "/build/entrypoint.sh" ]

View File

@@ -1,8 +0,0 @@
ARG BASE_IMAGE=ubuntu:22.04
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y pciutils && apt-get clean
COPY aio/ /aio
ENTRYPOINT [ "/aio/entrypoint.sh" ]

671
Makefile
View File

@@ -4,8 +4,11 @@ GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
GOLLAMA_VERSION?=6a8041ef6b46d4712afc3ae791d1c2d73da0ad1c
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
CPPLLAMA_VERSION?=4755afd1cbd40d93c017e5b98c39796f52345314
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -16,29 +19,28 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
WHISPER_CPP_VERSION?=a56f435fd475afd7edf02bfbf9f8c77f527198c2
# bert.cpp version
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
# go-piper version
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
# stablediffusion version
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
# tinydream version
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
TINYDREAM_VERSION?=772a9c0d9aaf768290e63cca3c904fe69faf677a
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?=
CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=
BUILD_ID?=git
TEST_DIR=/tmp/test
@@ -54,7 +56,7 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell gi
OPTIONAL_TARGETS?=
export OS := $(shell uname -s)
OS := $(shell uname -s)
ARCH := $(shell uname -m)
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
@@ -70,7 +72,7 @@ UNAME_S := $(shell uname -s)
endif
ifeq ($(OS),Darwin)
CGO_LDFLAGS += -lcblas -framework Accelerate
ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
endif
@@ -80,13 +82,7 @@ ifeq ($(OS),Darwin)
BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1
endif
ifeq ($(BUILD_TYPE),metal)
# -lcblas removed: it seems to always be listed as a duplicate flag.
CGO_LDFLAGS += -framework Accelerate
CMAKE_ARGS+=-DLLAMA_METAL=OFF
endif
endif
@@ -97,14 +93,11 @@ endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export GGML_CUDA=1
export WHISPER_CUDA=1
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
endif
ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
CGO_LDFLAGS+=-lcublas -lcudart -lculibos -lcublasLt -L$(CUDA_LIBPATH)
export LLAMA_CUBLAS=1
# required by whisper.cpp
export WHISPER_CUBLAS=1
CGO_LDFLAGS+=-L$(CUDA_PATH)/stubs -lcuda
endif
ifeq ($(BUILD_TYPE),hipblas)
@@ -116,15 +109,15 @@ ifeq ($(BUILD_TYPE),hipblas)
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export WHISPER_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif
ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
export GGML_METAL=1
export LLAMA_METAL=1
export WHISPER_METAL=1
endif
@@ -156,18 +149,14 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
OPTIONAL_GRPC+=backend-assets/grpc/piper
endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
@@ -182,115 +171,126 @@ ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS=
endif
.PHONY: all test build vendor get-sources prepare-sources prepare
.PHONY: all test build vendor
all: help
## BERT embeddings
sources/go-bert.cpp:
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a
## go-llama.cpp
sources/go-llama.cpp:
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## go-piper
sources/go-piper:
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## GPT4ALL
sources/gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## go-piper
sources/go-piper:
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
## RWKV
sources/go-rwkv.cpp:
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## BERT embeddings
sources/go-bert:
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
## stable diffusion
sources/go-stable-diffusion:
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
sources/go-stable-diffusion/libstablediffusion.a:
$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## tiny-dream
sources/go-tiny-dream:
git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
sources/go-tiny-dream/libtinydream.a:
$(MAKE) -C sources/go-tiny-dream libtinydream.a
## whisper
## RWKV
sources/go-rwkv:
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
sources/go-rwkv/librwkv.a: sources/go-rwkv
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
sources/go-bert/libgobert.a: sources/go-bert
$(MAKE) -C sources/go-bert libgobert.a
backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
mkdir -p backend-assets/gpt4all
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
backend-assets/espeak-ng-data: sources/go-piper
mkdir -p backend-assets/espeak-ng-data
$(MAKE) -C sources/go-piper piper.o
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
sources/whisper.cpp:
git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a
cd sources/whisper.cpp && make libwhisper.a
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
sources/go-llama:
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
sources/go-llama-ggml:
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
sources/go-llama/libbinding.a: sources/go-llama
$(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main
backend/cpp/llama/llama.cpp:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
touch $@
replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
$(GOCMD) mod download
touch $@
## GENERIC
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/go-llama clean
$(MAKE) -C sources/go-llama-ggml clean
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/go-rwkv clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean
$(MAKE) -C sources/go-bert clean
$(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
prepare: prepare-sources $(OPTIONAL_TARGETS)
touch $@
clean: ## Remove build related file
$(GOCMD) clean -cache
@@ -298,75 +298,22 @@ clean: ## Remove build related file
rm -rf ./sources
rm -rf $(BINARY_NAME)
rm -rf release/
rm -rf backend-assets/*
rm -rf backend-assets
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/cpp/llama clean
rm -rf backend/cpp/llama-* || true
$(MAKE) dropreplace
$(MAKE) protogen-clean
rmdir pkg/grpc/proto || true
clean-tests:
rm -rf test-models
rm -rf test-dir
rm -rf core/http/backend-assets
## Build:
build: prepare backend-assets grpcs ## Build the project
build: backend-assets grpcs prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp $(BACKEND_LIBS) backend-assets/lib/
endif
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
backend-assets/lib:
mkdir -p backend-assets/lib
dist:
$(MAKE) backend-assets/grpc/llama-cpp-avx2
ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
else
ifneq ($(ARCH),arm64)
$(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
endif
endif
STATIC=true $(MAKE) build
dist: build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),)
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
else
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
endif
dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
STATIC=true $(MAKE) build
mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),)
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64
shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256
else
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256
endif
osx-signed: build
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
@@ -375,10 +322,10 @@ osx-signed: build
run: prepare ## run local-ai
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
test-models/testmodel.ggml:
test-models/testmodel:
mkdir test-models
mkdir test-dir
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -390,9 +337,9 @@ prepare-test: grpcs
cp -rf backend-assets core/http
cp tests/models_fixtures/* test-models
test: prepare test-models/testmodel.ggml grpcs
test: prepare test-models/testmodel grpcs
@echo 'Running tests'
export GO_TAGS="tts stablediffusion debug"
export GO_TAGS="tts stablediffusion"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
@@ -406,16 +353,12 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
run-e2e-aio:
@echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
test-e2e:
@echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \
@@ -446,11 +389,6 @@ test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
test-stores: backend-assets/grpc/local-store
mkdir -p tests/integration/backend-assets/grpc
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
test-container:
docker build --target requirements -t local-ai-test-container .
docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
@@ -467,160 +405,30 @@ help: ## Show this help.
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
}' $(MAKEFILE_LIST)
.PHONY: protogen
protogen: protogen-go protogen-python
.PHONY: protogen-clean
protogen-clean: protogen-go-clean protogen-python-clean
.PHONY: protogen-go
protogen-go:
mkdir -p pkg/grpc/proto
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
backend/backend.proto
.PHONY: protogen-go-clean
protogen-go-clean:
$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
$(RM) bin/*
.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
$(MAKE) -C backend/python/autogptq protogen
.PHONY: autogptq-protogen-clean
autogptq-protogen-clean:
$(MAKE) -C backend/python/autogptq protogen-clean
.PHONY: bark-protogen
bark-protogen:
$(MAKE) -C backend/python/bark protogen
.PHONY: bark-protogen-clean
bark-protogen-clean:
$(MAKE) -C backend/python/bark protogen-clean
.PHONY: coqui-protogen
coqui-protogen:
$(MAKE) -C backend/python/coqui protogen
.PHONY: coqui-protogen-clean
coqui-protogen-clean:
$(MAKE) -C backend/python/coqui protogen-clean
.PHONY: diffusers-protogen
diffusers-protogen:
$(MAKE) -C backend/python/diffusers protogen
.PHONY: diffusers-protogen-clean
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: exllama-protogen
exllama-protogen:
$(MAKE) -C backend/python/exllama protogen
.PHONY: exllama-protogen-clean
exllama-protogen-clean:
$(MAKE) -C backend/python/exllama protogen-clean
.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
.PHONY: exllama2-protogen-clean
exllama2-protogen-clean:
$(MAKE) -C backend/python/exllama2 protogen-clean
.PHONY: mamba-protogen
mamba-protogen:
$(MAKE) -C backend/python/mamba protogen
.PHONY: mamba-protogen-clean
mamba-protogen-clean:
$(MAKE) -C backend/python/mamba protogen-clean
.PHONY: petals-protogen
petals-protogen:
$(MAKE) -C backend/python/petals protogen
.PHONY: petals-protogen-clean
petals-protogen-clean:
$(MAKE) -C backend/python/petals protogen-clean
.PHONY: rerankers-protogen
rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen
.PHONY: rerankers-protogen-clean
rerankers-protogen-clean:
$(MAKE) -C backend/python/rerankers protogen-clean
.PHONY: sentencetransformers-protogen
sentencetransformers-protogen:
$(MAKE) -C backend/python/sentencetransformers protogen
.PHONY: sentencetransformers-protogen-clean
sentencetransformers-protogen-clean:
$(MAKE) -C backend/python/sentencetransformers protogen-clean
.PHONY: transformers-protogen
transformers-protogen:
$(MAKE) -C backend/python/transformers protogen
.PHONY: transformers-protogen-clean
transformers-protogen-clean:
$(MAKE) -C backend/python/transformers protogen-clean
.PHONY: parler-tts-protogen
parler-tts-protogen:
$(MAKE) -C backend/python/parler-tts protogen
.PHONY: parler-tts-protogen-clean
parler-tts-protogen-clean:
$(MAKE) -C backend/python/parler-tts protogen-clean
.PHONY: transformers-musicgen-protogen
transformers-musicgen-protogen:
$(MAKE) -C backend/python/transformers-musicgen protogen
.PHONY: transformers-musicgen-protogen-clean
transformers-musicgen-protogen-clean:
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
.PHONY: vall-e-x-protogen
vall-e-x-protogen:
$(MAKE) -C backend/python/vall-e-x protogen
.PHONY: vall-e-x-protogen-clean
vall-e-x-protogen-clean:
$(MAKE) -C backend/python/vall-e-x protogen-clean
.PHONY: openvoice-protogen
openvoice-protogen:
$(MAKE) -C backend/python/openvoice protogen
.PHONY: openvoice-protogen-clean
openvoice-protogen-clean:
$(MAKE) -C backend/python/openvoice protogen-clean
.PHONY: vllm-protogen
vllm-protogen:
$(MAKE) -C backend/python/vllm protogen
.PHONY: vllm-protogen-clean
vllm-protogen-clean:
$(MAKE) -C backend/python/vllm protogen-clean
protogen-python:
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto
## GRPC
# Note: it is duplicated in the Dockerfile
prepare-extra-conda-environments: protogen-python
prepare-extra-conda-environments:
$(MAKE) -C backend/python/autogptq
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
@@ -628,17 +436,14 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/mamba
$(MAKE) -C backend/python/sentencetransformers
$(MAKE) -C backend/python/rerankers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/transformers-musicgen
$(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
prepare-test-extra:
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/diffusers
@@ -652,158 +457,89 @@ ifeq ($(BUILD_API_ONLY),true)
touch backend-assets/keep
endif
backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
mkdir -p backend-assets/espeak-ng-data
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
mkdir -p backend-assets/gpt4all
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
backend-assets/grpc: protogen-go replace
backend-assets/grpc:
mkdir -p backend-assets/grpc
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
backend/cpp/llama/llama.cpp:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
# TODO: every binary should have its own folder instead, so can have different implementations
## BACKEND CPP LLAMA START
# Sets the variables in case it has to build the gRPC locally.
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
build-llama-cpp-grpc-server:
# Conditionally build grpc for the llama backend to use if needed
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
backend/cpp/llama/grpc-server:
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
$(MAKE) -C backend/cpp/grpc build
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
$(MAKE) -C backend/cpp/${VARIANT} grpc-server
export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
export PATH="${INSTALLED_PACKAGES}/bin:${PATH}" && \
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
endif
## BACKEND CPP LLAMA END
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cpp
$(MAKE) -C backend/cpp/llama-cpp purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx2
$(MAKE) -C backend/cpp/llama-avx2 purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge
$(info ${GREEN}I llama-cpp build info:avx${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-fallback
$(MAKE) -C backend/cpp/llama-fallback purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
##
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cuda
$(MAKE) -C backend/cpp/llama-cuda purge
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
backend-assets/grpc/stablediffusion: backend-assets/grpc
if [ ! -f backend-assets/grpc/stablediffusion ]; then \
$(MAKE) sources/go-stable-diffusion; \
$(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \
fi
backend-assets/grpc/tinydream: backend-assets/grpc sources/go-tiny-dream/libtinydream.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
backend-assets/grpc/local-store: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
grpcs: prepare $(GRPC_BACKENDS)
DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
IMAGE_TYPE?=core
BASE_IMAGE?=ubuntu:22.04
@@ -811,71 +547,20 @@ docker:
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg GO_TAGS=$(GO_TAGS) \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) .
docker-cuda11:
docker build \
--build-arg CUDA_MAJOR_VERSION=11 \
--build-arg CUDA_MINOR_VERSION=8 \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE)-cuda11 .
docker-aio:
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
docker-aio-all:
$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
docker-image-intel:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
docker-image-intel-xpu:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
.PHONY: swagger
swagger:
swag init -g core/http/app.go --output swagger
.PHONY: gen-assets
gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
## Documentation
docs/layouts/_default:
mkdir -p docs/layouts/_default
docs/static/gallery.html: docs/layouts/_default
$(GOCMD) run ./.github/ci/modelslist.go ./gallery/index.yaml > docs/static/gallery.html
docs/public: docs/layouts/_default docs/static/gallery.html
cd docs && hugo --minify
docs-clean:
rm -rf docs/public
rm -rf docs/static/gallery.html
.PHONY: docs
docs: docs/static/gallery.html
cd docs && hugo serve
--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .

112
README.md
View File

@@ -20,14 +20,14 @@
</a>
</p>
<p align="center">
<a href="https://hub.docker.com/r/localai/localai" target="blank">
<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
</a>
<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
<img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
</a>
</p>
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
<p align="center">
<a href="https://twitter.com/LocalAI_API" target="blank">
@@ -36,53 +36,26 @@
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
</a>
</p>
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API thats compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e)
Run the installer script:
```bash
curl https://localai.io/install.sh | sh
```
Or run with docker:
```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# Alternative images:
# - if you have an Nvidia GPU:
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
# - without preconfigured models
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# - without preconfigured models for Nvidia GPUs
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
```
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API thats compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
## 🔥🔥 Hot topics / Roadmap
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726
- Upload file API: https://github.com/mudler/LocalAI/pull/1703
- Tools API support: https://github.com/mudler/LocalAI/pull/1715
- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714
- ROCm container images: https://github.com/mudler/LocalAI/pull/1595
- Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
- Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
- Img2vid https://github.com/mudler/LocalAI/pull/1442
Hot topics (looking for contributors):
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
@@ -91,19 +64,25 @@ Hot topics (looking for contributors):
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`:
```
docker run -ti -p 8080:8080 localai/localai:v2.9.0-ffmpeg-core phi-2
```
## 🚀 [Features](https://localai.io/features/)
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
## 💻 Usage
@@ -117,7 +96,6 @@ Build and deploy custom containers:
WebUIs:
- https://github.com/Jirubizu/localai-admin
- https://github.com/go-skynet/LocalAI-frontend
- QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
Model galleries
- https://github.com/go-skynet/model-gallery
@@ -125,19 +103,17 @@ Model galleries
Other:
- Helm chart https://github.com/go-skynet/helm-charts
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
- Terminal utility https://github.com/djcopley/ShellOracle
- Local Smart assistant https://github.com/mudler/LocalAGI
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
### 🔗 Resources
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
- [How to build locally](https://localai.io/basics/build/index.html)
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
@@ -145,8 +121,7 @@ Other:
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/ai/answers/tiZMDoZzZV6TLxgDXNBnFE/deploying-helm-charts-on-aws-eks)
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
@@ -173,16 +148,17 @@ If you utilize this repository, data in a downstream project, please consider ci
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
A huge thank you to our generous sponsors who support this project:
<p align="center">
<a href="https://www.spectrocloud.com/" target="blank">
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
</a>
<a href="https://www.premai.io/" target="blank">
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
</a>
</p>
| ![Spectro Cloud logo_600x600px_transparent bg](https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512) |
|:-----------------------------------------------:|
| [Spectro Cloud](https://www.spectrocloud.com/) |
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
- [Sponsor list](https://github.com/sponsors/mudler)
- JDAM00 (donating HW for the CI)
## 🌟 Star history
@@ -192,7 +168,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
MIT - Author Ettore Di Giacinto <mudler@localai.io>
MIT - Author Ettore Di Giacinto
## 🙇 Acknowledgements

View File

@@ -1,5 +0,0 @@
## AIO CPU size
Use this image with CPU-only.
Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).

View File

@@ -1,12 +0,0 @@
name: text-embedding-ada-002
backend: bert-embeddings
parameters:
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}'

View File

@@ -1,62 +0,0 @@
name: stablediffusion
backend: stablediffusion
parameters:
model: stablediffusion_assets
license: "BSD-3"
urls:
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
description: |
Stable Diffusion in NCNN with c++, supported txt2img and img2img
download_files:
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- filename: "stablediffusion_assets/log_sigmas.bin"
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"step": 25,
"size": "512x512"
}'

View File

@@ -1,27 +0,0 @@
name: jina-reranker-v1-base-en
backend: rerankers
parameters:
model: cross-encoder
usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'

View File

@@ -1,18 +0,0 @@
name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@@ -1,15 +0,0 @@
name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
parameters:
model: en-us-amy-low.onnx
usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"voice-en-us-amy-low",
"input": "Hi, this is a test."
}'

View File

@@ -1,101 +0,0 @@
name: gpt-4
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
context_size: 8192
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
function:
# disable injecting the "answer" tool
disable_no_action: true
grammar:
# This allows the grammar to also return messages
mixed_mode: true
# Suffix to add to the grammar
#prefix: '<tool_call>\n'
# Force parallel calls in the grammar
# parallel_calls: true
return_name_in_function_response: true
# Without grammar uncomment the lines below
# Warning: this is relying only on the capability of the
# LLM model to generate the correct function call.
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*?)"
replace_llm_results:
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
replace_function_results:
# Replace everything that is not JSON array or object
#
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
- key: "'([^']*?)'"
value: "_DQUOTE_${1}_DQUOTE_"
- key: '\\"'
value: "__TEMP_QUOTE__"
- key: "\'"
value: "'"
- key: "_DQUOTE_"
value: '"'
- key: "__TEMP_QUOTE__"
value: '"'
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
completion: |
{{.Input}}
function: |-
<|im_start|>system
You are a function calling AI model.
Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
You should call the tools provided to you sequentially
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
<scratchpad>
{step-by-step reasoning and plan in bullet points}
</scratchpad>
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
<tool_call>
{"arguments": <args-dict>, "name": <function-name>}
</tool_call><|im_end|>
{{.Input -}}
<|im_start|>assistant

View File

@@ -1,31 +0,0 @@
backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: bakllava-mmproj.gguf
parameters:
model: bakllava.gguf
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: bakllava.gguf
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@@ -1,138 +0,0 @@
#!/bin/bash
echo "===> LocalAI All-in-One (AIO) container starting..."
GPU_ACCELERATION=false
GPU_VENDOR=""
function check_intel() {
if lspci | grep -E 'VGA|3D' | grep -iq intel; then
echo "Intel GPU detected"
if [ -d /opt/intel ]; then
GPU_ACCELERATION=true
GPU_VENDOR=intel
else
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
fi
fi
}
function check_nvidia_wsl() {
if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
# We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
# Make sure the container was run with `--gpus all` as the only required parameter
echo "NVIDIA GPU detected via WSL2"
# nvidia-smi should be installed in the container
if nvidia-smi; then
GPU_ACCELERATION=true
GPU_VENDOR=nvidia
else
echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
fi
fi
}
function check_amd() {
if lspci | grep -E 'VGA|3D' | grep -iq amd; then
echo "AMD GPU detected"
# Check if ROCm is installed
if [ -d /opt/rocm ]; then
GPU_ACCELERATION=true
GPU_VENDOR=amd
else
echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
fi
fi
}
function check_nvidia() {
if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
echo "NVIDIA GPU detected"
# nvidia-smi should be installed in the container
if nvidia-smi; then
GPU_ACCELERATION=true
GPU_VENDOR=nvidia
else
echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
fi
fi
}
function check_metal() {
if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
echo "Apple Metal supported GPU detected"
GPU_ACCELERATION=true
GPU_VENDOR=apple
fi
}
function detect_gpu() {
case "$(uname -s)" in
Linux)
check_nvidia
check_amd
check_intel
check_nvidia_wsl
;;
Darwin)
check_metal
;;
esac
}
function detect_gpu_size() {
# Attempting to find GPU memory size for NVIDIA GPUs
if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
echo "NVIDIA GPU detected. Attempting to find memory size..."
# Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
# If handling multiple GPUs is required in the future, this is the place to do it
nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
if [ ! -z "$nvidia_sm" ]; then
echo "Total GPU Memory: $nvidia_sm MiB"
# if bigger than 8GB, use 16GB
#if [ "$nvidia_sm" -gt 8192 ]; then
# GPU_SIZE=gpu-16g
#else
GPU_SIZE=gpu-8g
#fi
else
echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
GPU_SIZE=gpu-8g
fi
elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
GPU_SIZE=intel
# Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
elif [ "$GPU_ACCELERATION" = true ]; then
echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
GPU_SIZE=gpu-8g
# default to cpu if GPU_SIZE is not set
else
echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
GPU_SIZE=cpu
fi
}
function check_vars() {
if [ -z "$MODELS" ]; then
echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
exit 1
fi
if [ -z "$PROFILE" ]; then
echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
exit 1
fi
}
detect_gpu
detect_gpu_size
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
check_vars
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
exec /build/entrypoint.sh "$@"

View File

@@ -1,12 +0,0 @@
name: text-embedding-ada-002
backend: sentencetransformers
parameters:
model: all-MiniLM-L6-v2
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}'

View File

@@ -1,25 +0,0 @@
name: stablediffusion
parameters:
model: DreamShaper_8_pruned.safetensors
backend: diffusers
step: 25
f16: true
diffusers:
pipeline_type: StableDiffusionPipeline
cuda: true
enable_parameters: "negative_prompt,num_inference_steps"
scheduler_type: "k_dpmpp_2m"
download_files:
- filename: DreamShaper_8_pruned.safetensors
uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"step": 25,
"size": "512x512"
}'

View File

@@ -1,27 +0,0 @@
name: jina-reranker-v1-base-en
backend: rerankers
parameters:
model: cross-encoder
usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'

View File

@@ -1,18 +0,0 @@
name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@@ -1,15 +0,0 @@
name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
parameters:
model: en-us-amy-low.onnx
usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"tts-1",
"input": "Hi, this is a test."
}'

View File

@@ -1,101 +0,0 @@
name: gpt-4
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
context_size: 8192
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
function:
# disable injecting the "answer" tool
disable_no_action: true
grammar:
# This allows the grammar to also return messages
mixed_mode: true
# Suffix to add to the grammar
#prefix: '<tool_call>\n'
# Force parallel calls in the grammar
# parallel_calls: true
return_name_in_function_response: true
# Without grammar uncomment the lines below
# Warning: this is relying only on the capability of the
# LLM model to generate the correct function call.
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*?)"
replace_llm_results:
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
replace_function_results:
# Replace everything that is not JSON array or object
#
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
- key: "'([^']*?)'"
value: "_DQUOTE_${1}_DQUOTE_"
- key: '\\"'
value: "__TEMP_QUOTE__"
- key: "\'"
value: "'"
- key: "_DQUOTE_"
value: '"'
- key: "__TEMP_QUOTE__"
value: '"'
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
completion: |
{{.Input}}
function: |-
<|im_start|>system
You are a function calling AI model.
Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
You should call the tools provided to you sequentially
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
<scratchpad>
{step-by-step reasoning and plan in bullet points}
</scratchpad>
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
<tool_call>
{"arguments": <args-dict>, "name": <function-name>}
</tool_call><|im_end|>
{{.Input -}}
<|im_start|>assistant

View File

@@ -1,35 +0,0 @@
backend: llama-cpp
context_size: 4096
f16: true
mmap: true
name: gpt-4-vision-preview
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: llava-v1.6-7b-mmproj-f16.gguf
parameters:
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
- filename: llava-v1.6-7b-mmproj-f16.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@@ -1,12 +0,0 @@
name: text-embedding-ada-002
backend: sentencetransformers
parameters:
model: all-MiniLM-L6-v2
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}'

View File

@@ -1,20 +0,0 @@
name: stablediffusion
parameters:
model: runwayml/stable-diffusion-v1-5
backend: diffusers
step: 25
f16: true
diffusers:
pipeline_type: StableDiffusionPipeline
cuda: true
enable_parameters: "negative_prompt,num_inference_steps"
scheduler_type: "k_dpmpp_2m"
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"step": 25,
"size": "512x512"
}'

View File

@@ -1,27 +0,0 @@
name: jina-reranker-v1-base-en
backend: rerankers
parameters:
model: cross-encoder
usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'

View File

@@ -1,18 +0,0 @@
name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@@ -1,15 +0,0 @@
name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
parameters:
model: en-us-amy-low.onnx
usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"tts-1",
"input": "Hi, this is a test."
}'

View File

@@ -1,103 +0,0 @@
name: gpt-4
mmap: false
context_size: 8192
f16: false
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
function:
# disable injecting the "answer" tool
disable_no_action: true
grammar:
# This allows the grammar to also return messages
mixed_mode: true
# Suffix to add to the grammar
#prefix: '<tool_call>\n'
# Force parallel calls in the grammar
# parallel_calls: true
return_name_in_function_response: true
# Without grammar uncomment the lines below
# Warning: this is relying only on the capability of the
# LLM model to generate the correct function call.
json_regex_match:
- "(?s)<tool_call>(.*?)</tool_call>"
- "(?s)<tool_call>(.*?)"
replace_llm_results:
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
replace_function_results:
# Replace everything that is not JSON array or object
#
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
- key: "'([^']*?)'"
value: "_DQUOTE_${1}_DQUOTE_"
- key: '\\"'
value: "__TEMP_QUOTE__"
- key: "\'"
value: "'"
- key: "_DQUOTE_"
value: '"'
- key: "__TEMP_QUOTE__"
value: '"'
# Drop the scratchpad content from responses
- key: "(?s)<scratchpad>.*</scratchpad>"
value: ""
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
completion: |
{{.Input}}
function: |-
<|im_start|>system
You are a function calling AI model.
Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
You should call the tools provided to you sequentially
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
<scratchpad>
{step-by-step reasoning and plan in bullet points}
</scratchpad>
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
<tool_call>
{"arguments": <args-dict>, "name": <function-name>}
</tool_call><|im_end|>
{{.Input -}}
<|im_start|>assistant

View File

@@ -1,35 +0,0 @@
backend: llama-cpp
context_size: 4096
mmap: false
f16: false
name: gpt-4-vision-preview
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: llava-v1.6-7b-mmproj-f16.gguf
parameters:
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
- filename: llava-v1.6-7b-mmproj-f16.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-4-vision-preview",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@@ -18,72 +18,6 @@ service Backend {
rpc TTS(TTSRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
rpc StoresSet(StoresSetOptions) returns (Result) {}
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
rpc Rerank(RerankRequest) returns (RerankResult) {}
}
message RerankRequest {
string query = 1;
repeated string documents = 2;
int32 top_n = 3;
}
message RerankResult {
Usage usage = 1;
repeated DocumentResult results = 2;
}
message Usage {
int32 total_tokens = 1;
int32 prompt_tokens = 2;
}
message DocumentResult {
int32 index = 1;
string text = 2;
float relevance_score = 3;
}
message StoresKey {
repeated float Floats = 1;
}
message StoresValue {
bytes Bytes = 1;
}
message StoresSetOptions {
repeated StoresKey Keys = 1;
repeated StoresValue Values = 2;
}
message StoresDeleteOptions {
repeated StoresKey Keys = 1;
}
message StoresGetOptions {
repeated StoresKey Keys = 1;
}
message StoresGetResult {
repeated StoresKey Keys = 1;
repeated StoresValue Values = 2;
}
message StoresFindOptions {
StoresKey Key = 1;
int32 TopK = 2;
}
message StoresFindResult {
repeated StoresKey Keys = 1;
repeated StoresValue Values = 2;
repeated float Similarities = 3;
}
message HealthMessage {}
@@ -131,15 +65,11 @@ message PredictOptions {
string NegativePrompt = 40;
int32 NDraft = 41;
repeated string Images = 42;
bool UseTokenizerTemplate = 43;
repeated Message Messages = 44;
}
// The response message containing the result
message Reply {
bytes message = 1;
int32 tokens = 2;
int32 prompt_tokens = 3;
}
message ModelOptions {
@@ -191,7 +121,7 @@ message ModelOptions {
bool NoMulMatQ = 37;
string DraftModel = 39;
string AudioPath = 38;
// vllm
@@ -201,7 +131,6 @@ message ModelOptions {
bool EnforceEager = 52;
int32 SwapSpace = 53;
int32 MaxModelLen = 54;
int32 TensorParallelSize = 55;
string MMProj = 41;
@@ -212,9 +141,6 @@ message ModelOptions {
float YarnBetaSlow = 47;
string Type = 49;
bool FlashAttention = 56;
bool NoKVOffload = 57;
}
message Result {
@@ -230,7 +156,6 @@ message TranscriptRequest {
string dst = 2;
string language = 3;
uint32 threads = 4;
bool translate = 5;
}
message TranscriptResult {
@@ -267,7 +192,6 @@ message TTSRequest {
string model = 2;
string dst = 3;
string voice = 4;
optional string language = 5;
}
message TokenizationResponse {
@@ -289,9 +213,4 @@ message StatusResponse {
}
State state = 1;
MemoryUsageData memory = 2;
}
message Message {
string role = 1;
string content = 2;
}

457
backend/backend_grpc.pb.go Normal file
View File

@@ -0,0 +1,457 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.2.0
// - protoc v4.23.4
// source: backend/backend.proto
package proto
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.32.0 or later.
const _ = grpc.SupportPackageIsVersion7
// BackendClient is the client API for Backend service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type BackendClient interface {
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
}
type backendClient struct {
cc grpc.ClientConnInterface
}
func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
return &backendClient{cc}
}
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
out := new(Reply)
err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
if err != nil {
return nil, err
}
x := &backendPredictStreamClient{stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
type Backend_PredictStreamClient interface {
Recv() (*Reply, error)
grpc.ClientStream
}
type backendPredictStreamClient struct {
grpc.ClientStream
}
func (x *backendPredictStreamClient) Recv() (*Reply, error) {
m := new(Reply)
if err := x.ClientStream.RecvMsg(m); err != nil {
return nil, err
}
return m, nil
}
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
out := new(EmbeddingResult)
err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
out := new(TranscriptResult)
err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
out := new(Result)
err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
out := new(TokenizationResponse)
err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
out := new(StatusResponse)
err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
// BackendServer is the server API for Backend service.
// All implementations must embed UnimplementedBackendServer
// for forward compatibility
type BackendServer interface {
Health(context.Context, *HealthMessage) (*Reply, error)
Predict(context.Context, *PredictOptions) (*Reply, error)
LoadModel(context.Context, *ModelOptions) (*Result, error)
PredictStream(*PredictOptions, Backend_PredictStreamServer) error
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
TTS(context.Context, *TTSRequest) (*Result, error)
TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
Status(context.Context, *HealthMessage) (*StatusResponse, error)
mustEmbedUnimplementedBackendServer()
}
// UnimplementedBackendServer must be embedded to have forward compatible implementations.
type UnimplementedBackendServer struct {
}
func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
}
func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
}
func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
}
func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
}
func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
}
func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
}
func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
}
func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
}
func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
}
func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
}
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to BackendServer will
// result in compilation errors.
type UnsafeBackendServer interface {
mustEmbedUnimplementedBackendServer()
}
func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
s.RegisterService(&Backend_ServiceDesc, srv)
}
func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(HealthMessage)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Health(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Health",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PredictOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Predict(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Predict",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ModelOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).LoadModel(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/LoadModel",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(PredictOptions)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
}
type Backend_PredictStreamServer interface {
Send(*Reply) error
grpc.ServerStream
}
type backendPredictStreamServer struct {
grpc.ServerStream
}
func (x *backendPredictStreamServer) Send(m *Reply) error {
return x.ServerStream.SendMsg(m)
}
func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PredictOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Embedding(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Embedding",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GenerateImageRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).GenerateImage(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/GenerateImage",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(TranscriptRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).AudioTranscription(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/AudioTranscription",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(TTSRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).TTS(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/TTS",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(PredictOptions)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).TokenizeString(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/TokenizeString",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
}
return interceptor(ctx, in, info, handler)
}
func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(HealthMessage)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(BackendServer).Status(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/backend.Backend/Status",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
}
return interceptor(ctx, in, info, handler)
}
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var Backend_ServiceDesc = grpc.ServiceDesc{
ServiceName: "backend.Backend",
HandlerType: (*BackendServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "Health",
Handler: _Backend_Health_Handler,
},
{
MethodName: "Predict",
Handler: _Backend_Predict_Handler,
},
{
MethodName: "LoadModel",
Handler: _Backend_LoadModel_Handler,
},
{
MethodName: "Embedding",
Handler: _Backend_Embedding_Handler,
},
{
MethodName: "GenerateImage",
Handler: _Backend_GenerateImage_Handler,
},
{
MethodName: "AudioTranscription",
Handler: _Backend_AudioTranscription_Handler,
},
{
MethodName: "TTS",
Handler: _Backend_TTS_Handler,
},
{
MethodName: "TokenizeString",
Handler: _Backend_TokenizeString_Handler,
},
{
MethodName: "Status",
Handler: _Backend_Status_Handler,
},
},
Streams: []grpc.StreamDesc{
{
StreamName: "PredictStream",
Handler: _Backend_PredictStream_Handler,
ServerStreams: true,
},
},
Metadata: "backend/backend.proto",
}

View File

@@ -5,6 +5,7 @@ SYSTEM ?= $(HOST_SYSTEM)
TAG_LIB_GRPC?=v1.59.0
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
GIT_CLONE_DEPTH?=1
NUM_BUILD_THREADS?=$(shell nproc --ignore=1)
INSTALLED_PACKAGES=installed_packages
GRPC_REPO=grpc_repo
@@ -47,11 +48,11 @@ $(INSTALLED_PACKAGES): grpc_build
$(GRPC_REPO):
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH)
$(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD)
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
build: $(INSTALLED_PACKAGES)

View File

@@ -4,44 +4,29 @@ LLAMA_VERSION?=
CMAKE_ARGS?=
BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
# Until this is tested properly, we disable embedded metal file
# as we already embed it as part of the LocalAI assets
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
TARGET+=--target ggml-metal
endif
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif
llama.cpp:
@@ -51,29 +36,37 @@ llama.cpp:
fi
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
llama.cpp/examples/grpc-server: llama.cpp
llama.cpp/examples/grpc-server:
mkdir -p llama.cpp/examples/grpc-server
bash prepare.sh
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
rebuild:
bash prepare.sh
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
rm -rf grpc-server
$(MAKE) grpc-server
purge:
rm -rf llama.cpp/build
rm -rf llama.cpp/examples/grpc-server
clean:
rm -rf llama.cpp
rm -rf grpc-server
clean: purge
rm -rf llama.cpp
grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
else
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
endif
cp llama.cpp/build/bin/grpc-server .

View File

@@ -791,7 +791,7 @@ struct llama_server_context
sampler_names.emplace_back(sampler_name);
}
}
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
}
else
{
@@ -886,8 +886,6 @@ struct llama_server_context
{"task_id", slot->task_id},
});
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true;
}
@@ -1148,7 +1146,7 @@ struct llama_server_context
std::vector<std::string> samplers_sequence;
for (const auto &sampler_type : slot.sparams.samplers_sequence)
{
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
}
return json {
@@ -2219,12 +2217,6 @@ static void params_parse(const backend::ModelOptions* request,
} else {
params.n_parallel = 1;
}
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
if (llama_grpc_servers != NULL) {
params.rpc_servers = std::string(llama_grpc_servers);
}
// TODO: Add yarn
if (!request->tensorsplit().empty()) {
@@ -2262,9 +2254,6 @@ static void params_parse(const backend::ModelOptions* request,
}
params.use_mlock = request->mlock();
params.use_mmap = request->mmap();
params.flash_attn = request->flashattention();
params.no_kv_offload = request->nokvoffload();
params.embedding = request->embeddings();
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
@@ -2343,10 +2332,6 @@ public:
std::string completion_text = result.result_json.value("content", "");
reply.set_message(completion_text);
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
reply.set_tokens(tokens_predicted);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply.set_prompt_tokens(tokens_evaluated);
// Send the reply
writer->Write(reply);
@@ -2372,10 +2357,6 @@ public:
task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) {
completion_text = result.result_json.value("content", "");
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply->set_prompt_tokens(tokens_evaluated);
reply->set_tokens(tokens_predicted);
reply->set_message(completion_text);
}
else

View File

@@ -1,20 +0,0 @@
#!/bin/bash
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/examples/grpc-server/
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
echo "grpc-server already added"
else
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
fi
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -3,9 +3,9 @@ package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/stablediffusion"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
)
type Image struct {

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -3,9 +3,9 @@ package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/tinydream"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/tinydream"
)
type Image struct {

View File

@@ -5,8 +5,8 @@ package main
import (
bert "github.com/go-skynet/go-bert.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
type Embeddings struct {

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -5,8 +5,8 @@ package main
import (
"fmt"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -4,11 +4,10 @@ package main
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"os"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/langchain"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/langchain"
)
type LLM struct {
@@ -19,14 +18,9 @@ type LLM struct {
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
var err error
hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
if hfToken == "" {
return fmt.Errorf("no huggingface token provided")
}
llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
llm.model = opts.Model
return err
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -5,9 +5,9 @@ package main
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type LLM struct {

View File

@@ -3,7 +3,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -6,9 +6,9 @@ import (
"fmt"
"path/filepath"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
)
type LLM struct {

View File

@@ -7,7 +7,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -7,8 +7,8 @@ import (
"path/filepath"
"github.com/donomii/go-rwkv.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
const tokenizerSuffix = ".tokenizer.json"
@@ -31,7 +31,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
if model == nil {
return fmt.Errorf("rwkv could not load model")
return fmt.Errorf("could not load model")
}
llm.rwkv = model
return nil

View File

@@ -1,14 +0,0 @@
//go:build debug
// +build debug
package main
import (
"github.com/rs/zerolog/log"
)
func assert(cond bool, msg string) {
if !cond {
log.Fatal().Stack().Msg(msg)
}
}

View File

@@ -1,26 +0,0 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each store
import (
"flag"
"os"
grpc "github.com/mudler/LocalAI/pkg/grpc"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
flag.Parse()
if err := grpc.StartServer(*addr, NewStore()); err != nil {
panic(err)
}
}

View File

@@ -1,7 +0,0 @@
//go:build !debug
// +build !debug
package main
func assert(cond bool, msg string) {
}

View File

@@ -1,507 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"container/heap"
"fmt"
"math"
"slices"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/rs/zerolog/log"
)
type Store struct {
base.SingleThread
// The sorted keys
keys [][]float32
// The sorted values
values [][]byte
// If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions
// TODO: Should we normalize incoming keys if they are not instead?
keysAreNormalized bool
// The first key decides the length of the keys
keyLen int
}
// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because
// that's theoretically best for memory layout and cache locality, but this isn't optimized yet.
type Pair struct {
Key []float32
Value []byte
}
func NewStore() *Store {
return &Store{
keys: make([][]float32, 0),
values: make([][]byte, 0),
keysAreNormalized: true,
keyLen: -1,
}
}
func compareSlices(k1, k2 []float32) int {
assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
return slices.Compare(k1, k2)
}
func hasKey(unsortedSlice [][]float32, target []float32) bool {
return slices.ContainsFunc(unsortedSlice, func(k []float32) bool {
return compareSlices(k, target) == 0
})
}
func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) {
return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int {
return compareSlices(k, t)
})
}
func isSortedPairs(kvs []Pair) bool {
for i := 1; i < len(kvs); i++ {
if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 {
return false
}
}
return true
}
func isSortedKeys(keys [][]float32) bool {
for i := 1; i < len(keys); i++ {
if compareSlices(keys[i-1], keys[i]) > 0 {
return false
}
}
return true
}
func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
ks := make([][]float32, len(keys))
for i, k := range keys {
ks[i] = k.Floats
}
slices.SortFunc(ks, compareSlices)
assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys)))
assert(isSortedKeys(ks), "keys are not sorted")
return ks
}
func (s *Store) Load(opts *pb.ModelOptions) error {
return nil
}
// Sort the incoming kvs and merge them with the existing sorted kvs
func (s *Store) StoresSet(opts *pb.StoresSetOptions) error {
if len(opts.Keys) == 0 {
return fmt.Errorf("no keys to add")
}
if len(opts.Keys) != len(opts.Values) {
return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
}
if s.keyLen == -1 {
s.keyLen = len(opts.Keys[0].Floats)
} else {
if len(opts.Keys[0].Floats) != s.keyLen {
return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
}
}
kvs := make([]Pair, len(opts.Keys))
for i, k := range opts.Keys {
if s.keysAreNormalized && !isNormalized(k.Floats) {
s.keysAreNormalized = false
var sample []float32
if len(s.keys) > 5 {
sample = k.Floats[:5]
} else {
sample = k.Floats
}
log.Debug().Msgf("Key is not normalized: %v", sample)
}
kvs[i] = Pair{
Key: k.Floats,
Value: opts.Values[i].Bytes,
}
}
slices.SortFunc(kvs, func(a, b Pair) int {
return compareSlices(a.Key, b.Key)
})
assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys)))
assert(isSortedPairs(kvs), "keys are not sorted")
l := len(kvs) + len(s.keys)
merge_ks := make([][]float32, 0, l)
merge_vs := make([][]byte, 0, l)
i, j := 0, 0
for {
if i+j >= l {
break
}
if i >= len(kvs) {
merge_ks = append(merge_ks, s.keys[j])
merge_vs = append(merge_vs, s.values[j])
j++
continue
}
if j >= len(s.keys) {
merge_ks = append(merge_ks, kvs[i].Key)
merge_vs = append(merge_vs, kvs[i].Value)
i++
continue
}
c := compareSlices(kvs[i].Key, s.keys[j])
if c < 0 {
merge_ks = append(merge_ks, kvs[i].Key)
merge_vs = append(merge_vs, kvs[i].Value)
i++
} else if c > 0 {
merge_ks = append(merge_ks, s.keys[j])
merge_vs = append(merge_vs, s.values[j])
j++
} else {
merge_ks = append(merge_ks, kvs[i].Key)
merge_vs = append(merge_vs, kvs[i].Value)
i++
j++
}
}
assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l))
assert(isSortedKeys(merge_ks), "merge keys are not sorted")
s.keys = merge_ks
s.values = merge_vs
return nil
}
func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error {
if len(opts.Keys) == 0 {
return fmt.Errorf("no keys to delete")
}
if len(opts.Keys) == 0 {
return fmt.Errorf("no keys to add")
}
if s.keyLen == -1 {
s.keyLen = len(opts.Keys[0].Floats)
} else {
if len(opts.Keys[0].Floats) != s.keyLen {
return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
}
}
ks := sortIntoKeySlicese(opts.Keys)
l := len(s.keys) - len(ks)
merge_ks := make([][]float32, 0, l)
merge_vs := make([][]byte, 0, l)
tail_ks := s.keys
tail_vs := s.values
for _, k := range ks {
j, found := findInSortedSlice(tail_ks, k)
if found {
merge_ks = append(merge_ks, tail_ks[:j]...)
merge_vs = append(merge_vs, tail_vs[:j]...)
tail_ks = tail_ks[j+1:]
tail_vs = tail_vs[j+1:]
} else {
assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k))
}
log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs))
}
merge_ks = append(merge_ks, tail_ks...)
merge_vs = append(merge_vs, tail_vs...)
assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys)))
s.keys = merge_ks
s.values = merge_vs
assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l))
assert(isSortedKeys(s.keys), "keys are not sorted")
assert(func() bool {
for _, k := range ks {
if _, found := findInSortedSlice(s.keys, k); found {
return false
}
}
return true
}(), "Keys to delete still present")
if len(s.keys) != l {
log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l)
}
return nil
}
func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
pbKeys := make([]*pb.StoresKey, 0, len(opts.Keys))
pbValues := make([]*pb.StoresValue, 0, len(opts.Keys))
ks := sortIntoKeySlicese(opts.Keys)
if len(s.keys) == 0 {
log.Debug().Msgf("Get: No keys in store")
}
if s.keyLen == -1 {
s.keyLen = len(opts.Keys[0].Floats)
} else {
if len(opts.Keys[0].Floats) != s.keyLen {
return pb.StoresGetResult{}, fmt.Errorf("Try to get a key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
}
}
tail_k := s.keys
tail_v := s.values
for i, k := range ks {
j, found := findInSortedSlice(tail_k, k)
if found {
pbKeys = append(pbKeys, &pb.StoresKey{
Floats: k,
})
pbValues = append(pbValues, &pb.StoresValue{
Bytes: tail_v[j],
})
tail_k = tail_k[j+1:]
tail_v = tail_v[j+1:]
} else {
assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: i=%d, %v", i, k))
}
}
if len(pbKeys) != len(opts.Keys) {
log.Debug().Msgf("Get: Some keys not found: len(pbKeys) = %d, len(opts.Keys) = %d, len(s.Keys) = %d", len(pbKeys), len(opts.Keys), len(s.keys))
}
return pb.StoresGetResult{
Keys: pbKeys,
Values: pbValues,
}, nil
}
func isNormalized(k []float32) bool {
var sum float32
for _, v := range k {
sum += v
}
return sum == 1.0
}
// TODO: This we could replace with handwritten SIMD code
func normalizedCosineSimilarity(k1, k2 []float32) float32 {
assert(len(k1) == len(k2), fmt.Sprintf("normalizedCosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
var dot float32
for i := 0; i < len(k1); i++ {
dot += k1[i] * k2[i]
}
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
// 2.0 * (1.0 - dot) would be the Euclidean distance
return dot
}
type PriorityItem struct {
Similarity float32
Key []float32
Value []byte
}
type PriorityQueue []*PriorityItem
func (pq PriorityQueue) Len() int { return len(pq) }
func (pq PriorityQueue) Less(i, j int) bool {
// Inverted because the most similar should be at the top
return pq[i].Similarity < pq[j].Similarity
}
func (pq PriorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
}
func (pq *PriorityQueue) Push(x any) {
item := x.(*PriorityItem)
*pq = append(*pq, item)
}
func (pq *PriorityQueue) Pop() any {
old := *pq
n := len(old)
item := old[n-1]
*pq = old[0 : n-1]
return item
}
func (s *Store) StoresFindNormalized(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
tk := opts.Key.Floats
top_ks := make(PriorityQueue, 0, int(opts.TopK))
heap.Init(&top_ks)
for i, k := range s.keys {
sim := normalizedCosineSimilarity(tk, k)
heap.Push(&top_ks, &PriorityItem{
Similarity: sim,
Key: k,
Value: s.values[i],
})
if top_ks.Len() > int(opts.TopK) {
heap.Pop(&top_ks)
}
}
similarities := make([]float32, top_ks.Len())
pbKeys := make([]*pb.StoresKey, top_ks.Len())
pbValues := make([]*pb.StoresValue, top_ks.Len())
for i := top_ks.Len() - 1; i >= 0; i-- {
item := heap.Pop(&top_ks).(*PriorityItem)
similarities[i] = item.Similarity
pbKeys[i] = &pb.StoresKey{
Floats: item.Key,
}
pbValues[i] = &pb.StoresValue{
Bytes: item.Value,
}
}
return pb.StoresFindResult{
Keys: pbKeys,
Values: pbValues,
Similarities: similarities,
}, nil
}
func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
assert(len(k1) == len(k2), fmt.Sprintf("cosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
var dot, mag2 float64
for i := 0; i < len(k1); i++ {
dot += float64(k1[i] * k2[i])
mag2 += float64(k2[i] * k2[i])
}
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
return sim
}
func (s *Store) StoresFindFallback(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
tk := opts.Key.Floats
top_ks := make(PriorityQueue, 0, int(opts.TopK))
heap.Init(&top_ks)
var mag1 float64
for _, v := range tk {
mag1 += float64(v * v)
}
mag1 = math.Sqrt(mag1)
for i, k := range s.keys {
dist := cosineSimilarity(tk, k, mag1)
heap.Push(&top_ks, &PriorityItem{
Similarity: dist,
Key: k,
Value: s.values[i],
})
if top_ks.Len() > int(opts.TopK) {
heap.Pop(&top_ks)
}
}
similarities := make([]float32, top_ks.Len())
pbKeys := make([]*pb.StoresKey, top_ks.Len())
pbValues := make([]*pb.StoresValue, top_ks.Len())
for i := top_ks.Len() - 1; i >= 0; i-- {
item := heap.Pop(&top_ks).(*PriorityItem)
similarities[i] = item.Similarity
pbKeys[i] = &pb.StoresKey{
Floats: item.Key,
}
pbValues[i] = &pb.StoresValue{
Bytes: item.Value,
}
}
return pb.StoresFindResult{
Keys: pbKeys,
Values: pbValues,
Similarities: similarities,
}, nil
}
func (s *Store) StoresFind(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
tk := opts.Key.Floats
if len(tk) != s.keyLen {
return pb.StoresFindResult{}, fmt.Errorf("Try to find key with length %d when existing length is %d", len(tk), s.keyLen)
}
if opts.TopK < 1 {
return pb.StoresFindResult{}, fmt.Errorf("opts.TopK = %d, must be >= 1", opts.TopK)
}
if s.keyLen == -1 {
s.keyLen = len(opts.Key.Floats)
} else {
if len(opts.Key.Floats) != s.keyLen {
return pb.StoresFindResult{}, fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Key.Floats), s.keyLen)
}
}
if s.keysAreNormalized && isNormalized(tk) {
return s.StoresFindNormalized(opts)
} else {
if s.keysAreNormalized {
var sample []float32
if len(s.keys) > 5 {
sample = tk[:5]
} else {
sample = tk
}
log.Debug().Msgf("Trying to compare non-normalized key with normalized keys: %v", sample)
}
return s.StoresFindFallback(opts)
}
}

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -8,11 +8,11 @@ import (
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/core/schema"
)
func ffmpegCommand(args []string) (string, error) {
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
func runCommand(command []string) (string, error) {
cmd := exec.Command(command[0], command[1:]...)
cmd.Env = os.Environ()
out, err := cmd.CombinedOutput()
return string(out), err
@@ -21,16 +21,16 @@ func ffmpegCommand(args []string) (string, error) {
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := ffmpegCommand(commandArgs)
command := []string{"ffmpeg", "-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := runCommand(command)
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
}
return nil
}
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
res := schema.TranscriptionResult{}
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
res := schema.Result{}
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
@@ -75,10 +75,6 @@ func Transcript(model whisper.Model, audiopath, language string, translate bool,
context.SetLanguage("auto")
}
if translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil); err != nil {
return res, err
}

View File

@@ -4,9 +4,9 @@ package main
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
type Whisper struct {
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
}

View File

@@ -5,7 +5,7 @@ package main
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (

View File

@@ -7,8 +7,8 @@ import (
"os"
"path/filepath"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
piper "github.com/mudler/go-piper"
)

View File

@@ -1,17 +1,4 @@
.PHONY: autogptq
autogptq: protogen
bash install.sh
autogptq:
$(MAKE) -C ../common-env/transformers
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@@ -5,14 +5,12 @@ import signal
import sys
import os
import time
import base64
import grpc
import backend_pb2
import backend_pb2_grpc
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoTokenizer
from transformers import TextGenerationPipeline
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -30,18 +28,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.Device != "":
device = request.Device
# support loading local model files
model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model)
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode)
tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=request.UseFastTokenizer)
# support model `Qwen/Qwen-VL-Chat-Int4`
if "qwen-vl" in request.Model.lower():
self.model_name = "Qwen-VL-Chat"
model = AutoModelForCausalLM.from_pretrained(model_path,
trust_remote_code=request.TrustRemoteCode,
device_map="auto").eval()
else:
model = AutoGPTQForCausalLM.from_quantized(model_path,
model = AutoGPTQForCausalLM.from_quantized(request.Model,
model_basename=request.ModelBaseName,
use_safetensors=True,
trust_remote_code=request.TrustRemoteCode,
@@ -66,11 +55,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.TopP != 0.0:
top_p = request.TopP
prompt_images = self.recompile_vl_prompt(request)
compiled_prompt = prompt_images[0]
print(f"Prompt: {compiled_prompt}", file=sys.stderr)
# Implement Predict RPC
pipeline = TextGenerationPipeline(
model=self.model,
@@ -80,17 +64,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
top_p=top_p,
repetition_penalty=penalty,
)
t = pipeline(compiled_prompt)[0]["generated_text"]
print(f"generated_text: {t}", file=sys.stderr)
if compiled_prompt in t:
t = t.replace(compiled_prompt, "")
# house keeping. Remove the image files from /tmp folder
for img_path in prompt_images[1]:
try:
os.remove(img_path)
except Exception as e:
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
t = pipeline(request.Prompt)[0]["generated_text"]
# Remove prompt from response if present
if request.Prompt in t:
t = t.replace(request.Prompt, "")
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
@@ -101,24 +78,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# Not implemented yet
return self.Predict(request, context)
def recompile_vl_prompt(self, request):
prompt = request.Prompt
image_paths = []
if "qwen-vl" in self.model_name.lower():
# request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename.
# Then, save the image file paths to an array "image_paths".
# read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt".
for i, img in enumerate(request.Images):
timestamp = str(int(time.time() * 1000)) # Generate timestamp
img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename
with open(img_path, "wb") as f:
f.write(base64.b64decode(img))
image_paths.append(img_path)
prompt = prompt.replace(f"[img-{i}]", "<img>" + img_path + "</img>,")
else:
prompt = request.Prompt
return (prompt, image_paths)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))

View File

@@ -0,0 +1,86 @@
name: autogptq
channels:
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2023.08.22=h06a4308_0
- ld_impl_linux-64=2.38=h1181459_1
- libffi=3.4.4=h6a678d5_0
- libgcc-ng=11.2.0=h1234567_1
- libgomp=11.2.0=h1234567_1
- libstdcxx-ng=11.2.0=h1234567_1
- libuuid=1.41.5=h5eee18b_0
- ncurses=6.4=h6a678d5_0
- openssl=3.0.11=h7f8727e_2
- pip=23.2.1=py311h06a4308_0
- python=3.11.5=h955ad1f_0
- readline=8.2=h5eee18b_0
- setuptools=68.0.0=py311h06a4308_0
- sqlite=3.41.2=h5eee18b_0
- tk=8.6.12=h1ccaba5_0
- wheel=0.41.2=py311h06a4308_0
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=h5eee18b_0
- pip:
- accelerate==0.23.0
- aiohttp==3.8.5
- aiosignal==1.3.1
- async-timeout==4.0.3
- attrs==23.1.0
- auto-gptq==0.4.2
- certifi==2023.7.22
- charset-normalizer==3.3.0
- datasets==2.14.5
- dill==0.3.7
- filelock==3.12.4
- frozenlist==1.4.0
- fsspec==2023.6.0
- grpcio==1.59.0
- huggingface-hub==0.16.4
- idna==3.4
- jinja2==3.1.2
- markupsafe==2.1.3
- mpmath==1.3.0
- multidict==6.0.4
- multiprocess==0.70.15
- networkx==3.1
- numpy==1.26.0
- nvidia-cublas-cu12==12.1.3.1
- nvidia-cuda-cupti-cu12==12.1.105
- nvidia-cuda-nvrtc-cu12==12.1.105
- nvidia-cuda-runtime-cu12==12.1.105
- nvidia-cudnn-cu12==8.9.2.26
- nvidia-cufft-cu12==11.0.2.54
- nvidia-curand-cu12==10.3.2.106
- nvidia-cusolver-cu12==11.4.5.107
- nvidia-cusparse-cu12==12.1.0.106
- nvidia-nccl-cu12==2.18.1
- nvidia-nvjitlink-cu12==12.2.140
- nvidia-nvtx-cu12==12.1.105
- packaging==23.2
- pandas==2.1.1
- peft==0.5.0
- protobuf==4.24.4
- psutil==5.9.5
- pyarrow==13.0.0
- python-dateutil==2.8.2
- pytz==2023.3.post1
- pyyaml==6.0.1
- regex==2023.10.3
- requests==2.31.0
- rouge==1.0.1
- safetensors>=0.3.3
- six==1.16.0
- sympy==1.12
- tokenizers==0.14.0
- torch==2.1.0
- tqdm==4.66.1
- transformers==4.34.0
- triton==2.1.0
- typing-extensions==4.8.0
- tzdata==2023.3
- urllib3==2.0.6
- xxhash==3.4.1
- yarl==1.9.2

View File

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,363 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import backend_pb2 as backend__pb2
class BackendStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.Health = channel.unary_unary(
'/backend.Backend/Health',
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.Predict = channel.unary_unary(
'/backend.Backend/Predict',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.LoadModel = channel.unary_unary(
'/backend.Backend/LoadModel',
request_serializer=backend__pb2.ModelOptions.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
self.PredictStream = channel.unary_stream(
'/backend.Backend/PredictStream',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.Embedding = channel.unary_unary(
'/backend.Backend/Embedding',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.EmbeddingResult.FromString,
)
self.GenerateImage = channel.unary_unary(
'/backend.Backend/GenerateImage',
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
self.AudioTranscription = channel.unary_unary(
'/backend.Backend/AudioTranscription',
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
response_deserializer=backend__pb2.TranscriptResult.FromString,
)
self.TTS = channel.unary_unary(
'/backend.Backend/TTS',
request_serializer=backend__pb2.TTSRequest.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
self.TokenizeString = channel.unary_unary(
'/backend.Backend/TokenizeString',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.TokenizationResponse.FromString,
)
self.Status = channel.unary_unary(
'/backend.Backend/Status',
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.StatusResponse.FromString,
)
class BackendServicer(object):
"""Missing associated documentation comment in .proto file."""
def Health(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Predict(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def LoadModel(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def PredictStream(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Embedding(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GenerateImage(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def AudioTranscription(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def TTS(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def TokenizeString(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Status(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
'Health': grpc.unary_unary_rpc_method_handler(
servicer.Health,
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'Predict': grpc.unary_unary_rpc_method_handler(
servicer.Predict,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'LoadModel': grpc.unary_unary_rpc_method_handler(
servicer.LoadModel,
request_deserializer=backend__pb2.ModelOptions.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
'PredictStream': grpc.unary_stream_rpc_method_handler(
servicer.PredictStream,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'Embedding': grpc.unary_unary_rpc_method_handler(
servicer.Embedding,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
),
'GenerateImage': grpc.unary_unary_rpc_method_handler(
servicer.GenerateImage,
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
servicer.AudioTranscription,
request_deserializer=backend__pb2.TranscriptRequest.FromString,
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
),
'TTS': grpc.unary_unary_rpc_method_handler(
servicer.TTS,
request_deserializer=backend__pb2.TTSRequest.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
'TokenizeString': grpc.unary_unary_rpc_method_handler(
servicer.TokenizeString,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
),
'Status': grpc.unary_unary_rpc_method_handler(
servicer.Status,
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.StatusResponse.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
# This class is part of an EXPERIMENTAL API.
class Backend(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def Health(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
backend__pb2.HealthMessage.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Predict(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def LoadModel(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
backend__pb2.ModelOptions.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def PredictStream(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Embedding(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.EmbeddingResult.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def GenerateImage(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
backend__pb2.GenerateImageRequest.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def AudioTranscription(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
backend__pb2.TranscriptRequest.SerializeToString,
backend__pb2.TranscriptResult.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def TTS(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
backend__pb2.TTSRequest.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def TokenizeString(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.TokenizationResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Status(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
backend__pb2.HealthMessage.SerializeToString,
backend__pb2.StatusResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

View File

@@ -1,14 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
installRequirements

View File

@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch

View File

@@ -1,5 +0,0 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,7 +0,0 @@
accelerate
auto-gptq==0.7.1
grpcio==1.64.0
protobuf
torch
certifi
transformers

View File

@@ -1,4 +1,14 @@
#!/bin/bash
source $(dirname $0)/../common/libbackend.sh
startBackend $@
##
## A bash script wrapper that runs the autogptq server with conda
export PATH=$PATH:/opt/conda/bin
# Activate conda environment
source activate transformers
# get the directory where the bash script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
python $DIR/autogptq.py $@

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
runUnittests

View File

@@ -1,29 +1,15 @@
.PHONY: ttsbark
ttsbark: protogen
bash install.sh
ttsbark:
$(MAKE) -C ../common-env/transformers
.PHONY: run
run: protogen
run:
@echo "Running bark..."
bash run.sh
@echo "bark run."
.PHONY: test
test: protogen
test:
@echo "Testing bark..."
bash test.sh
@echo "bark tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

Some files were not shown because too many files have changed in this diff Show More