Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
ca65e9ee9b Ci testsg 2024-06-13 23:34:11 +02:00
445 changed files with 5901 additions and 20528 deletions

View File

@@ -1,17 +0,0 @@
#!/bin/bash
cd /workspace
# Get the files into the volume without a bind mount
if [ ! -d ".git" ]; then
git clone https://github.com/mudler/LocalAI.git .
else
git fetch
fi
echo "Standard Post-Create script completed."
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
echo "Launching customization postcreate.sh"
bash "/devcontainer-customization/postcreate.sh"
fi

View File

@@ -1,16 +0,0 @@
#!/bin/bash
cd /workspace
# Grab the pre-stashed backend assets to avoid build issues
cp -r /build/backend-assets /workspace/backend-assets
# Ensures generated source files are present upon load
make prepare
echo "Standard Post-Start script completed."
if [ -f "/devcontainer-customization/poststart.sh" ]; then
echo "Launching customization poststart.sh"
bash "/devcontainer-customization/poststart.sh"
fi

View File

@@ -1,55 +0,0 @@
#!/bin/bash
# This file contains some really simple functions that are useful when building up customization scripts.
# Checks if the git config has a user registered - and sets it up if not.
#
# Param 1: name
# Param 2: email
#
config_user() {
echo "Configuring git for $1 <$2>"
local gcn=$(git config --global user.name)
if [ -z "${gcn}" ]; then
echo "Setting up git user / remote"
git config --global user.name "$1"
git config --global user.email "$2"
fi
}
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
#
# Param 1: remote name
# Param 2: remote url
#
config_remote() {
echo "Adding git remote and fetching $2 as $1"
local gr=$(git remote -v | grep $1)
if [ -z "${gr}" ]; then
git remote add $1 $2
fi
git fetch $1
}
# Setup special .ssh files
# Prints out lines of text to make things pretty
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
setup_ssh() {
echo "starting ~/.ssh directory setup..."
mkdir -p "${HOME}.ssh"
chmod 0700 "${HOME}/.ssh"
echo "-----"
local files=("$@")
for file in "${files[@]}" ; do
local cfile="/devcontainer-customization/${file}"
local hfile="${HOME}/.ssh/${file}"
if [ ! -f "${hfile}" ]; then
echo "copying \"${file}\""
cp "${cfile}" "${hfile}"
chmod 600 "${hfile}"
fi
done
echo "~/.ssh directory setup complete!"
}

View File

@@ -1,25 +0,0 @@
Place any additional resources your environment requires in this directory
Script hooks are currently called for:
`postcreate.sh` and `poststart.sh`
If files with those names exist here, they will be called at the end of the normal script.
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
```
#!/bin/bash
source "/.devcontainer-scripts/utils.sh"
sshfiles=("config", "key.pub")
setup_ssh "${sshfiles[@]}"
config_user "YOUR NAME" "YOUR EMAIL"
config_remote "REMOTE NAME" "REMOTE URL"
```

View File

@@ -1,24 +0,0 @@
{
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
"name": "LocalAI",
"workspaceFolder": "/workspace",
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
"service": "api",
"shutdownAction": "stopCompose",
"customizations": {
"vscode": {
"extensions": [
"golang.go",
"ms-vscode.makefile-tools",
"ms-azuretools.vscode-docker",
"ms-python.python",
"ms-python.debugpy",
"wayou.vscode-todo-highlight",
"waderyan.gitblame"
]
}
},
"forwardPorts": [8080, 3000],
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
}

View File

@@ -1,48 +0,0 @@
services:
api:
build:
context: ..
dockerfile: Dockerfile
target: devcontainer
args:
- FFMPEG=true
- IMAGE_TYPE=extras
- GO_TAGS=stablediffusion p2p tts
env_file:
- ../.env
ports:
- 8080:8080
volumes:
- localai_workspace:/workspace
- ../models:/host-models
- ./customization:/devcontainer-customization
command: /bin/sh -c "while sleep 1000; do :; done"
cap_add:
- SYS_PTRACE
security_opt:
- seccomp:unconfined
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
ports:
- 9090:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- prom_data:/prometheus
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3000:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
volumes:
prom_data:
localai_workspace:

View File

@@ -1,10 +0,0 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View File

@@ -1,21 +0,0 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: prometheus
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
static_configs:
- targets:
- localhost:9090

View File

@@ -1,7 +1,6 @@
.idea .idea
.github .github
.vscode .vscode
.devcontainer
models models
examples/chatbot-ui/models examples/chatbot-ui/models
examples/rwkv/models examples/rwkv/models

3
.env
View File

@@ -79,9 +79,6 @@
### Enable to run parallel requests ### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true # LOCALAI_PARALLEL_REQUESTS=true
# Enable to allow p2p mode
# LOCALAI_P2P=true
### Watchdog settings ### Watchdog settings
### ###
# Enables watchdog to kill backends that are inactive for too much time # Enables watchdog to kill backends that are inactive for too much time

13
.github/bump_deps.sh vendored
View File

@@ -6,17 +6,4 @@ VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH") LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
# Read $VAR from Makefile (only first match)
set +e
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
set -e
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/" sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
if [ -z "$CURRENT_COMMIT" ]; then
echo "Could not find $VAR in Makefile."
exit 0
fi
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"

View File

@@ -1,85 +0,0 @@
import hashlib
from huggingface_hub import hf_hub_download, get_paths_info
import requests
import sys
import os
uri = sys.argv[1]
file_name = uri.split('/')[-1]
# Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
return 'huggingface', repo_path
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json()
# Check if 'hasUnsafeFile' exists in the response
if 'hasUnsafeFile' in scan:
if scan['hasUnsafeFile']:
return scan
else:
return None
else:
return None
download_type, repo_id_or_url = parse_uri(uri)
new_checksum = None
file_path = None
# Decide download method based on URI type
if download_type == 'huggingface':
# Check if the repo is flagged as dangerous by HF
hazard = manual_safety_check_hf(repo_id_or_url)
if hazard != None:
print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
sys.exit(5)
# Use HF API to pull sha
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
try:
new_checksum = file.lfs.sha256
break
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
if new_checksum is None:
try:
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
else:
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
elif response.status_code == 404:
print(f'File not found: {response.status_code}', file=sys.stderr)
sys.exit(2)
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
if new_checksum is None:
new_checksum = calculate_sha256(file_path)
print(new_checksum)
os.remove(file_path)
else:
print(new_checksum)

View File

@@ -14,14 +14,77 @@ function check_and_update_checksum() {
idx="$5" idx="$5"
# Download the file and calculate new checksum using Python # Download the file and calculate new checksum using Python
new_checksum=$(python3 ./.github/check_and_update.py $uri) new_checksum=$(python3 -c "
result=$? import hashlib
from huggingface_hub import hf_hub_download, get_paths_info
import requests
import sys
import os
if [[ $result -eq 5 ]]; then uri = '$uri'
echo "Contaminated entry detected, deleting entry for $model_name..." file_name = uri.split('/')[-1]
yq eval -i "del([$idx])" "$input_yaml"
return # Function to parse the URI and determine download method
fi # Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
return 'huggingface', repo_path
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
download_type, repo_id_or_url = parse_uri(uri)
new_checksum = None
# Decide download method based on URI type
if download_type == 'huggingface':
# Use HF API to pull sha
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
try:
new_checksum = file.lfs.sha256
break
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
if new_checksum is None:
try:
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
except Exception as e:
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
sys.exit(2)
else:
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
elif response.status_code == 404:
print(f'File not found: {response.status_code}', file=sys.stderr)
sys.exit(2)
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
if new_checksum is None:
new_checksum = calculate_sha256(file_path)
print(new_checksum)
os.remove(file_path)
else:
print(new_checksum)
")
if [[ "$new_checksum" == "" ]]; then if [[ "$new_checksum" == "" ]]; then
echo "Error calculating checksum for $file_name. Skipping..." echo "Error calculating checksum for $file_name. Skipping..."
@@ -31,7 +94,7 @@ function check_and_update_checksum() {
echo "Checksum for $file_name: $new_checksum" echo "Checksum for $file_name: $new_checksum"
# Compare and update the YAML file if checksums do not match # Compare and update the YAML file if checksums do not match
result=$?
if [[ $result -eq 2 ]]; then if [[ $result -eq 2 ]]; then
echo "File not found, deleting entry for $file_name..." echo "File not found, deleting entry for $file_name..."
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml" # yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"

View File

@@ -6,7 +6,6 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"github.com/microcosm-cc/bluemonday"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@@ -76,7 +75,7 @@ var modelPageTemplate string = `
<div class="container mx-auto px-4 py-4"> <div class="container mx-auto px-4 py-4">
<div class="flex items-center justify-between"> <div class="flex items-center justify-between">
<div class="flex items-center"> <div class="flex items-center">
<a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a> <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
<a href="/" class="text-white text-xl font-bold">LocalAI</a> <a href="/" class="text-white text-xl font-bold">LocalAI</a>
</div> </div>
<!-- Menu button for small screens --> <!-- Menu button for small screens -->
@@ -93,9 +92,9 @@ var modelPageTemplate string = `
<!-- Collapsible menu for small screens --> <!-- Collapsible menu for small screens -->
<div class="hidden lg:hidden" id="mobile-menu"> <div class="hidden lg:hidden" id="mobile-menu">
<div class="pt-4 pb-3 border-t border-gray-700"> <div class="pt-4 pb-3 border-t border-gray-700">
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a> <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
</div> </div>
</div> </div>
</div> </div>
@@ -115,17 +114,17 @@ var modelPageTemplate string = `
<h2 class="text-center text-3xl font-semibold text-gray-100"> <h2 class="text-center text-3xl font-semibold text-gray-100">
🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" > 🖼️ Available {{.AvailableModels}} models</i> repositories <a href="https://localai.io/models/" target="_blank" >
<i class="fas fa-circle-info pr-2"></i> <i class="fas fa-circle-info pr-2"></i>
</a></h2> </a></h2>
<h3> <h3>
Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br> Refer to <a href="https://localai.io/models" target=_blank> Model gallery</a> for more information on how to use the models with LocalAI.
You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI. You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
</h3> </h3>
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search" <input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
id="searchbox" placeholder="Live search keyword.."> id="searchbox" placeholder="Live search keyword..">
<div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark"> <div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
{{ range $_, $model := .Models }} {{ range $_, $model := .Models }}
@@ -140,10 +139,10 @@ var modelPageTemplate string = `
</div> </div>
<div class="p-6 text-surface dark:text-white"> <div class="p-6 text-surface dark:text-white">
<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5> <h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
<p class="mb-4 text-base truncate">{{ $model.Description }}</p> <p class="mb-4 text-base truncate">{{ $model.Description }}</p>
</div> </div>
<div class="px-6 pt-4 pb-2"> <div class="px-6 pt-4 pb-2">
@@ -179,7 +178,7 @@ var modelPageTemplate string = `
{{ $model.Description }} {{ $model.Description }}
</p> </p>
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400"> <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
To install the model with the CLI, run: <br> To install the model with the CLI, run: <br>
<code> local-ai models install {{$model.Name}} </code> <br> <code> local-ai models install {{$model.Name}} </code> <br>
@@ -194,7 +193,7 @@ var modelPageTemplate string = `
<ul> <ul>
{{ range $_, $u := $model.URLs }} {{ range $_, $u := $model.URLs }}
<li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li> <li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
{{ end }} {{ end }}
</ul> </ul>
</p> </p>
</div> </div>
@@ -210,7 +209,7 @@ var modelPageTemplate string = `
</div> </div>
</div> </div>
</div> </div>
{{ end }} {{ end }}
</div> </div>
</div> </div>
@@ -222,10 +221,10 @@ var lazyLoadInstance = new LazyLoad({
}); });
let cards = document.querySelectorAll('.box') let cards = document.querySelectorAll('.box')
function liveSearch() { function liveSearch() {
let search_query = document.getElementById("searchbox").value; let search_query = document.getElementById("searchbox").value;
//Use innerText if all contents are visible //Use innerText if all contents are visible
//Use textContent for including hidden elements //Use textContent for including hidden elements
for (var i = 0; i < cards.length; i++) { for (var i = 0; i < cards.length; i++) {
@@ -239,8 +238,8 @@ function liveSearch() {
} }
//A little delay //A little delay
let typingTimer; let typingTimer;
let typeInterval = 500; let typeInterval = 500;
let searchInput = document.getElementById('searchbox'); let searchInput = document.getElementById('searchbox');
searchInput.addEventListener('keyup', () => { searchInput.addEventListener('keyup', () => {
@@ -280,12 +279,6 @@ func main() {
return return
} }
// Ensure that all arbitrary text content is sanitized before display
for i, m := range models {
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
}
// render the template // render the template
data := struct { data := struct {
Models []*GalleryModel Models []*GalleryModel

108
.github/dependabot.yml vendored
View File

@@ -1,10 +1,6 @@
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2 version: 2
updates: updates:
- package-ecosystem: "gitsubmodule"
directory: "/"
schedule:
interval: "weekly"
- package-ecosystem: "gomod" - package-ecosystem: "gomod"
directory: "/" directory: "/"
schedule: schedule:
@@ -27,107 +23,3 @@ updates:
schedule: schedule:
# Check for updates to GitHub Actions every weekday # Check for updates to GitHub Actions every weekday
interval: "weekly" interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/autogptq"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/bark"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/common/template"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/coqui"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/diffusers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama2"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/mamba"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/openvoice"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/parler-tts"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/rerankers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/sentencetransformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers-musicgen"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vall-e-x"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vllm"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/chainlit"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/functions"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain/langchainpy-localai-example"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain-chroma"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/streamlit-bot"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/k8sgpt"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/kubernetes"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/langchain"
schedule:
interval: "weekly"
- package-ecosystem: "gomod"
directory: "/examples/semantic-todo"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/telegram-bot"
schedule:
interval: "weekly"

3
.github/release.yml vendored
View File

@@ -13,9 +13,6 @@ changelog:
labels: labels:
- bug - bug
- regression - regression
- title: "🖧 P2P area"
labels:
- area/p2p
- title: Exciting New Features 🎉 - title: Exciting New Features 🎉
labels: labels:
- Semver-Minor - Semver-Minor

View File

@@ -9,6 +9,9 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
include: include:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "ggerganov/llama.cpp" - repository: "ggerganov/llama.cpp"
variable: "CPPLLAMA_VERSION" variable: "CPPLLAMA_VERSION"
branch: "master" branch: "master"
@@ -27,6 +30,9 @@ jobs:
- repository: "go-skynet/bloomz.cpp" - repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION" variable: "BLOOMZ_VERSION"
branch: "main" branch: "main"
- repository: "nomic-ai/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
- repository: "mudler/go-ggllm.cpp" - repository: "mudler/go-ggllm.cpp"
variable: "GOGGLLM_VERSION" variable: "GOGGLLM_VERSION"
branch: "master" branch: "master"
@@ -40,30 +46,17 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Bump dependencies 🔧 - name: Bump dependencies 🔧
id: bump
run: | run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
{
echo 'message<<EOF'
cat "${{ matrix.variable }}_message.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
{
echo 'commit<<EOF'
cat "${{ matrix.variable }}_commit.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
rm -rfv ${{ matrix.variable }}_message.txt
rm -rfv ${{ matrix.variable }}_commit.txt
- name: Create Pull Request - name: Create Pull Request
uses: peter-evans/create-pull-request@v7 uses: peter-evans/create-pull-request@v6
with: with:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}' commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`' title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}" branch: "update/${{ matrix.variable }}"
body: ${{ steps.bump.outputs.message }} body: Bump of ${{ matrix.repository }} version
signoff: true signoff: true

View File

@@ -17,12 +17,12 @@ jobs:
run: | run: |
bash .github/bump_docs.sh ${{ matrix.repository }} bash .github/bump_docs.sh ${{ matrix.repository }}
- name: Create Pull Request - name: Create Pull Request
uses: peter-evans/create-pull-request@v7 uses: peter-evans/create-pull-request@v6
with: with:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}' commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}' title: ':arrow_up: Update docs version ${{ matrix.repository }}'
branch: "update/docs" branch: "update/docs"
body: Bump of ${{ matrix.repository }} version inside docs body: Bump of ${{ matrix.repository }} version inside docs
signoff: true signoff: true

View File

@@ -20,12 +20,12 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y pip wget sudo apt-get install -y pip wget
sudo pip install --upgrade pip sudo pip install --upgrade pip
pip install huggingface_hub pip install huggingface_hub
- name: 'Setup yq' - name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.1.1 uses: dcarbone/install-yq-action@v1.1.1
with: with:
version: 'v4.44.2' version: 'v4.43.1'
download-compressed: true download-compressed: true
force: true force: true
@@ -36,12 +36,12 @@ jobs:
sudo chmod 777 /hf_cache sudo chmod 777 /hf_cache
bash .github/checksum_checker.sh gallery/index.yaml bash .github/checksum_checker.sh gallery/index.yaml
- name: Create Pull Request - name: Create Pull Request
uses: peter-evans/create-pull-request@v7 uses: peter-evans/create-pull-request@v6
with: with:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml' commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
title: 'chore(model-gallery): :arrow_up: update checksum' title: 'models(gallery): :arrow_up: update checksum'
branch: "update/checksum" branch: "update/checksum"
body: Updating checksums in gallery/index.yaml body: Updating checksums in gallery/index.yaml
signoff: true signoff: true

View File

@@ -14,7 +14,7 @@ jobs:
steps: steps:
- name: Dependabot metadata - name: Dependabot metadata
id: metadata id: metadata
uses: dependabot/fetch-metadata@v2.2.0 uses: dependabot/fetch-metadata@v2.1.0
with: with:
github-token: "${{ secrets.GITHUB_TOKEN }}" github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true skip-commit-verification: true

View File

@@ -1,64 +0,0 @@
name: Explorer deployment
on:
push:
branches:
- master
tags:
- 'v*'
concurrency:
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
jobs:
build-linux:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
make protogen-go
- name: Build api
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
script: |
sudo rm -rf local-ai/ || true
- name: copy file via ssh
uses: appleboy/scp-action@v0.1.7
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
source: "local-ai"
overwrite: true
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
script: |
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
sudo systemctl restart local-ai

View File

@@ -1,83 +0,0 @@
name: Comment PRs
on:
pull_request_target:
jobs:
comment-pr:
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
ref: "${{ github.event.pull_request.merge_commit_sha }}"
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
base_branch: ${{ github.event.pull_request.base.sha }}
- name: Show diff
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
run: |
cat $DIFF
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: mshick/add-pr-comment@v2
if: always()
with:
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
message: ${{ steps.summarize.outputs.message }}
message-failure: |
Uh oh! Could not analyze this PR, maybe it's too big?

View File

@@ -75,7 +75,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Cache GRPC - name: Cache GRPC
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
@@ -84,11 +84,11 @@ jobs:
build-args: | build-args: |
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.64.0
context: . context: .
file: ./Dockerfile file: ./Dockerfile
cache-to: type=gha,ignore-error=true cache-to: type=gha,ignore-error=true
cache-from: type=gha cache-from: type=gha
target: grpc target: grpc
platforms: ${{ matrix.platforms }} platforms: ${{ matrix.platforms }}
push: false push: false

View File

@@ -15,7 +15,7 @@ jobs:
strategy: strategy:
matrix: matrix:
include: include:
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 - base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
platforms: 'linux/amd64' platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}} runs-on: ${{matrix.runs-on}}
@@ -46,7 +46,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Cache Intel images - name: Cache Intel images
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |

View File

@@ -32,22 +32,21 @@ jobs:
strategy: strategy:
# Pushing with all jobs in parallel # Pushing with all jobs in parallel
# eats the bandwidth of all the nodes # eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix: matrix:
include: include:
# This is basically covered by the AIO test - build-type: ''
# - build-type: '' platforms: 'linux/amd64'
# platforms: 'linux/amd64' tag-latest: 'false'
# tag-latest: 'false' tag-suffix: '-ffmpeg'
# tag-suffix: '-ffmpeg' ffmpeg: 'true'
# ffmpeg: 'true' image-type: 'extras'
# image-type: 'extras' runs-on: 'arc-runner-set'
# runs-on: 'arc-runner-set' base-image: "ubuntu:22.04"
# base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target"
# makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg' tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -56,85 +55,76 @@ jobs:
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'hipblas' - build-type: 'hipblas'
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# tag-suffix: '-hipblas' tag-suffix: '-hipblas'
# ffmpeg: 'false' ffmpeg: 'false'
# image-type: 'extras' image-type: 'extras'
# base-image: "rocm/dev-ubuntu-22.04:6.1" base-image: "rocm/dev-ubuntu-22.04:6.1"
# grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
# runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'sycl_f16' - build-type: 'sycl_f16'
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg' tag-suffix: 'sycl-f16-ffmpeg'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'extras' image-type: 'extras'
# runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# core-image-build: core-image-build:
# uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
# with: with:
# tag-latest: ${{ matrix.tag-latest }} tag-latest: ${{ matrix.tag-latest }}
# tag-suffix: ${{ matrix.tag-suffix }} tag-suffix: ${{ matrix.tag-suffix }}
# ffmpeg: ${{ matrix.ffmpeg }} ffmpeg: ${{ matrix.ffmpeg }}
# image-type: ${{ matrix.image-type }} image-type: ${{ matrix.image-type }}
# build-type: ${{ matrix.build-type }} build-type: ${{ matrix.build-type }}
# cuda-major-version: ${{ matrix.cuda-major-version }} cuda-major-version: ${{ matrix.cuda-major-version }}
# cuda-minor-version: ${{ matrix.cuda-minor-version }} cuda-minor-version: ${{ matrix.cuda-minor-version }}
# platforms: ${{ matrix.platforms }} platforms: ${{ matrix.platforms }}
# runs-on: ${{ matrix.runs-on }} runs-on: ${{ matrix.runs-on }}
# base-image: ${{ matrix.base-image }} base-image: ${{ matrix.base-image }}
# grpc-base-image: ${{ matrix.grpc-base-image }} grpc-base-image: ${{ matrix.grpc-base-image }}
# makeflags: ${{ matrix.makeflags }} makeflags: ${{ matrix.makeflags }}
# secrets: secrets:
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
# strategy: strategy:
# matrix: matrix:
# include: include:
# - build-type: '' - build-type: ''
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# tag-suffix: '-ffmpeg-core' tag-suffix: '-ffmpeg-core'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'core' image-type: 'core'
# runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'sycl_f16' - build-type: 'sycl_f16'
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest" base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
# grpc-base-image: "ubuntu:22.04" grpc-base-image: "ubuntu:22.04"
# tag-suffix: 'sycl-f16-ffmpeg-core' tag-suffix: 'sycl-f16-ffmpeg-core'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'core' image-type: 'core'
# runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
# makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
# - build-type: 'cublas' - build-type: 'cublas'
# cuda-major-version: "12" cuda-major-version: "12"
# cuda-minor-version: "0" cuda-minor-version: "1"
# platforms: 'linux/amd64' platforms: 'linux/amd64'
# tag-latest: 'false' tag-latest: 'false'
# tag-suffix: '-cublas-cuda12-ffmpeg-core' tag-suffix: '-cublas-cuda12-ffmpeg-core'
# ffmpeg: 'true' ffmpeg: 'true'
# image-type: 'core' image-type: 'core'
# runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
# - build-type: 'vulkan'
# platforms: 'linux/amd64'
# tag-latest: 'false'
# tag-suffix: '-vulkan-ffmpeg-core'
# ffmpeg: 'true'
# image-type: 'core'
# runs-on: 'ubuntu-latest'
# base-image: "ubuntu:22.04"
# makeflags: "--jobs=4 --output-sync=target"

View File

@@ -13,78 +13,6 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
hipblas-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: 2
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs: self-hosted-jobs:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@@ -111,7 +39,7 @@ jobs:
strategy: strategy:
# Pushing with all jobs in parallel # Pushing with all jobs in parallel
# eats the bandwidth of all the nodes # eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix: matrix:
include: include:
# Extra images # Extra images
@@ -147,7 +75,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12' tag-suffix: '-cublas-cuda12'
@@ -172,7 +100,7 @@ jobs:
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
tag-suffix: '-cublas-cuda12-ffmpeg' tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -194,6 +122,29 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16' - build-type: 'sycl_f16'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
@@ -261,7 +212,27 @@ jobs:
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build: core-image-build:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@@ -286,7 +257,6 @@ jobs:
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }} quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }} quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy: strategy:
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
matrix: matrix:
include: include:
- build-type: '' - build-type: ''
@@ -296,7 +266,7 @@ jobs:
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
aio: "-aio-cpu" aio: "-aio-cpu"
latest-image: 'latest-cpu' latest-image: 'latest-cpu'
latest-image-aio: 'latest-aio-cpu' latest-image-aio: 'latest-aio-cpu'
@@ -310,18 +280,18 @@ jobs:
ffmpeg: '' ffmpeg: ''
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-core' tag-suffix: '-cublas-cuda12-core'
ffmpeg: '' ffmpeg: ''
image-type: 'core' image-type: 'core'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "11" cuda-major-version: "11"
@@ -331,27 +301,17 @@ jobs:
tag-suffix: '-cublas-cuda11-ffmpeg-core' tag-suffix: '-cublas-cuda11-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"
- build-type: 'cublas' - build-type: 'cublas'
cuda-major-version: "12" cuda-major-version: "12"
cuda-minor-version: "0" cuda-minor-version: "1"
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'false' tag-latest: 'false'
tag-suffix: '-cublas-cuda12-ffmpeg-core' tag-suffix: '-cublas-cuda12-ffmpeg-core'
ffmpeg: 'true' ffmpeg: 'true'
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'ubuntu-latest'
base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target"
- build-type: 'vulkan'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-vulkan-ffmpeg-core'
latest-image: 'latest-vulkan-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
runs-on: 'arc-runner-set'
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
makeflags: "--jobs=4 --output-sync=target" makeflags: "--jobs=4 --output-sync=target"

View File

@@ -19,11 +19,11 @@ on:
type: string type: string
cuda-major-version: cuda-major-version:
description: 'CUDA major version' description: 'CUDA major version'
default: "12" default: "11"
type: string type: string
cuda-minor-version: cuda-minor-version:
description: 'CUDA minor version' description: 'CUDA minor version'
default: "4" default: "7"
type: string type: string
platforms: platforms:
description: 'Platforms' description: 'Platforms'
@@ -215,7 +215,7 @@ jobs:
password: ${{ secrets.quayPassword }} password: ${{ secrets.quayPassword }}
- name: Build and push - name: Build and push
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request'
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
@@ -232,7 +232,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }} BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
@@ -243,7 +243,7 @@ jobs:
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
### Start testing image ### Start testing image
- name: Build and push - name: Build and push
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
@@ -260,7 +260,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }} BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.65.0 GRPC_VERSION=v1.64.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
@@ -276,7 +276,7 @@ jobs:
## End testing image ## End testing image
- name: Build and push AIO image - name: Build and push AIO image
if: inputs.aio != '' if: inputs.aio != ''
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |
@@ -291,7 +291,7 @@ jobs:
- name: Build and push AIO image (dockerhub) - name: Build and push AIO image (dockerhub)
if: inputs.aio != '' if: inputs.aio != ''
uses: docker/build-push-action@v6 uses: docker/build-push-action@v5
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |
@@ -324,7 +324,7 @@ jobs:
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
- name: job summary - name: job summary
run: | run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

View File

@@ -1,168 +0,0 @@
name: Notifications for new models
on:
pull_request:
types:
- closed
jobs:
notify-discord:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
notify-twitter:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
- name: Start LocalAI
run: |
echo "Starting LocalAI..."
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: Eomm/why-don-t-you-tweet@v2
with:
tweet-message: ${{ steps.summarize.outputs.message }}
env:
# Get your tokens from https://developer.twitter.com/apps
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true

View File

@@ -1,63 +0,0 @@
name: Release notifications
on:
release:
types:
- published
jobs:
notify-discord:
runs-on: ubuntu-latest
env:
RELEASE_BODY: ${{ github.event.release.body }}
RELEASE_TITLE: ${{ github.event.release.name }}
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
steps:
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
- name: Summarize
id: summarize
run: |
input="$RELEASE_TITLE\b$RELEASE_BODY"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "Write a discord message with a bullet point summary of the release notes."
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI API
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary=$(echo $response | jq -r '.choices[0].message.content')
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
DISCORD_USERNAME: "LocalAI-Bot"
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}

View File

@@ -1,28 +0,0 @@
name: Check PR style
on:
pull_request_target:
types:
- opened
- reopened
- edited
- synchronize
jobs:
title-lint:
runs-on: ubuntu-latest
permissions:
statuses: write
steps:
- uses: aslafy-z/conventional-pr-title-action@v3
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# check-pr-description:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v2
# - uses: jadrol/pr-description-checker-action@v1.0.0
# id: description-checker
# with:
# repo-token: ${{ secrets.GITHUB_TOKEN }}
# exempt-labels: no qa

View File

@@ -1,15 +1,11 @@
name: Build and Release name: Build and Release
on: on:
push: - push
branches: - pull_request
- master
tags:
- 'v*'
pull_request:
env: env:
GRPC_VERSION: v1.65.0 GRPC_VERSION: v1.64.0
permissions: permissions:
contents: write contents: write
@@ -31,11 +27,12 @@ jobs:
with: with:
go-version: '1.21.x' go-version: '1.21.x'
cache: false cache: false
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
- name: Install CUDA Dependencies - name: Install CUDA Dependencies
run: | run: |
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
@@ -55,8 +52,7 @@ jobs:
run: | run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \ -DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target ../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC - name: Install gRPC
@@ -100,17 +96,11 @@ jobs:
CROSS_TOOLCHAIN=/usr/$GNU_HOST CROSS_TOOLCHAIN=/usr/$GNU_HOST
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH export PATH=/usr/local/cuda/bin:$PATH
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6 GO_TAGS=p2p GOOS=linux GOARCH=arm64 CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
GOOS=linux \
GOARCH=arm64 \
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: LocalAI-linux-arm64 name: LocalAI-linux-arm64
@@ -121,13 +111,7 @@ jobs:
with: with:
files: | files: |
release/* release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-linux: build-linux:
runs-on: arc-runner-set runs-on: arc-runner-set
steps: steps:
@@ -150,7 +134,7 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
- name: Intel Dependencies - name: Intel Dependencies
run: | run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -164,21 +148,21 @@ jobs:
sudo apt-get update sudo apt-get update
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
env: env:
CUDA_VERSION: 12-5 CUDA_VERSION: 12-3
- name: "Install Hipblas" - name: "Install Hipblas"
env: env:
ROCM_VERSION: "6.1" ROCM_VERSION: "6.1"
AMDGPU_VERSION: "6.1" AMDGPU_VERSION: "6.1"
run: | run: |
set -ex set -ex
sudo apt-get update sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
sudo apt-get update sudo apt-get update
@@ -186,10 +170,10 @@ jobs:
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \ sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
hipblas-dev rocm-dev \ hipblas-dev rocm-dev \
rocblas-dev rocblas-dev
sudo apt-get clean sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/* sudo rm -rf /var/lib/apt/lists/*
sudo ldconfig sudo ldconfig
- name: Cache grpc - name: Cache grpc
id: cache-grpc id: cache-grpc
uses: actions/cache@v4 uses: actions/cache@v4
@@ -200,26 +184,22 @@ jobs:
if: steps.cache-grpc.outputs.cache-hit != 'true' if: steps.cache-grpc.outputs.cache-hit != 'true'
run: | run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \ -DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 --output-sync=target ../.. && sudo make --jobs 5 --output-sync=target
- name: Install gRPC - name: Install gRPC
run: | run: |
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
- name: Build - name: Build
id: build id: build
run: | run: |
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH export PATH=/opt/rocm/bin:$PATH
source /opt/intel/oneapi/setvars.sh source /opt/intel/oneapi/setvars.sh
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so GO_TAGS=p2p make -j4 dist
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
make -j4 dist
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: LocalAI-linux name: LocalAI-linux
@@ -230,13 +210,7 @@ jobs:
with: with:
files: | files: |
release/* release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-stablediffusion: build-stablediffusion:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
@@ -251,9 +225,9 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build stablediffusion - name: Build stablediffusion
run: | run: |
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
@@ -272,48 +246,6 @@ jobs:
files: | files: |
release/* release/*
build-macOS-x86_64:
runs-on: macos-13
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build
id: build
run: |
export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
make dist
- uses: actions/upload-artifact@v4
with:
name: LocalAI-MacOS-x86_64
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-macOS-arm64: build-macOS-arm64:
runs-on: macos-14 runs-on: macos-14
steps: steps:
@@ -325,19 +257,22 @@ jobs:
with: with:
go-version: '1.21.x' go-version: '1.21.x'
cache: false cache: false
- name: Setup tmate session if tests fail
uses: mxschmitt/action-tmate@v3.18
with:
limit-access-to-actor: true
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc libomp llvm brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
- name: Build - name: Build
id: build id: build
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
export CC=/opt/homebrew/opt/llvm/bin/clang GO_TAGS=p2p make dist
make dist
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
with: with:
name: LocalAI-MacOS-arm64 name: LocalAI-MacOS-arm64
@@ -348,10 +283,3 @@ jobs:
with: with:
files: | files: |
release/* release/*
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner - name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.21.4 uses: securego/gosec@master
with: with:
# we let the report trigger content trigger a failure using the GitHub Security features. # we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...' args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -19,7 +19,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -29,8 +29,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test transformers - name: Test transformers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/transformers make --jobs=5 --output-sync=target -C backend/python/transformers
@@ -41,7 +41,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -51,8 +51,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test sentencetransformers - name: Test sentencetransformers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
@@ -64,7 +64,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -74,7 +74,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test rerankers - name: Test rerankers
run: | run: |
@@ -86,7 +86,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -96,7 +96,7 @@ jobs:
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
# Install UV # Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test diffusers - name: Test diffusers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/diffusers make --jobs=5 --output-sync=target -C backend/python/diffusers
@@ -107,7 +107,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -117,19 +117,19 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test parler-tts - name: Test parler-tts
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test make --jobs=5 --output-sync=target -C backend/python/parler-tts test
tests-openvoice: tests-openvoice:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -139,7 +139,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test openvoice - name: Test openvoice
run: | run: |
@@ -151,7 +151,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -161,13 +161,39 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test transformers-musicgen - name: Test transformers-musicgen
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
# tests-petals:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user grpcio-tools==1.64.0
# - name: Test petals
# run: |
# make --jobs=5 --output-sync=target -C backend/python/petals
# make --jobs=5 --output-sync=target -C backend/python/petals test
# tests-bark: # tests-bark:
# runs-on: ubuntu-latest # runs-on: ubuntu-latest
# steps: # steps:
@@ -213,7 +239,7 @@ jobs:
# df -h # df -h
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@@ -223,14 +249,14 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1 # pip install --user grpcio-tools==1.64.0
# - name: Test bark # - name: Test bark
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/bark # make --jobs=5 --output-sync=target -C backend/python/bark
# make --jobs=5 --output-sync=target -C backend/python/bark test # make --jobs=5 --output-sync=target -C backend/python/bark test
# Below tests needs GPU. Commented out for now # Below tests needs GPU. Commented out for now
# TODO: Re-enable as soon as we have GPU nodes # TODO: Re-enable as soon as we have GPU nodes
# tests-vllm: # tests-vllm:
@@ -238,7 +264,7 @@ jobs:
# steps: # steps:
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@@ -248,7 +274,7 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1 # pip install --user grpcio-tools==1.64.0
# - name: Test vllm # - name: Test vllm
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm # make --jobs=5 --output-sync=target -C backend/python/vllm
@@ -258,7 +284,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -268,7 +294,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test vall-e-x - name: Test vall-e-x
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/vall-e-x make --jobs=5 --output-sync=target -C backend/python/vall-e-x
@@ -279,7 +305,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@@ -288,8 +314,8 @@ jobs:
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
# Install UV # Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test coqui - name: Test coqui
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/coqui make --jobs=5 --output-sync=target -C backend/python/coqui
make --jobs=5 --output-sync=target -C backend/python/coqui test make --jobs=5 --output-sync=target -C backend/python/coqui test

View File

@@ -10,7 +10,7 @@ on:
- '*' - '*'
env: env:
GRPC_VERSION: v1.65.0 GRPC_VERSION: v1.64.0
concurrency: concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
@@ -70,8 +70,7 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg sudo apt-get install build-essential curl ffmpeg
sudo apt-get install -y libgmock-dev
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
@@ -94,8 +93,8 @@ jobs:
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
export CUDACXX=/usr/local/cuda/bin/nvcc export CUDACXX=/usr/local/cuda/bin/nvcc
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
# The python3-grpc-tools package in 22.04 is too old # The python3-grpc-tools package in 22.04 is too old
pip install --user grpcio-tools pip install --user grpcio-tools
@@ -110,7 +109,7 @@ jobs:
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
env: env:
CUDA_VERSION: 12-4 CUDA_VERSION: 12-3
- name: Cache grpc - name: Cache grpc
id: cache-grpc id: cache-grpc
uses: actions/cache@v4 uses: actions/cache@v4
@@ -121,8 +120,7 @@ jobs:
if: steps.cache-grpc.outputs.cache-hit != 'true' if: steps.cache-grpc.outputs.cache-hit != 'true'
run: | run: |
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \ git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \ -DgRPC_BUILD_TESTS=OFF \
../.. && sudo make --jobs 5 ../.. && sudo make --jobs 5
- name: Install gRPC - name: Install gRPC
@@ -178,22 +176,13 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies
run: |
# Install protoc
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build images - name: Build images
run: | run: |
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile . docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test - name: Test
run: | run: |
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \ LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
make run-e2e-aio make run-e2e-aio
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
@@ -223,16 +212,15 @@ jobs:
run: go version run: go version
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
pip install --user --no-cache-dir grpcio-tools==1.64.1 pip install --user grpcio-tools==1.64.0
- name: Test - name: Test
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include
export CPLUS_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include
export CC=/opt/homebrew/opt/llvm/bin/clang
# Used to run the newer GNUMake version from brew that supports --output-sync # Used to run the newer GNUMake version from brew that supports --output-sync
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH" export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.18 uses: mxschmitt/action-tmate@v3.18

View File

@@ -13,19 +13,13 @@ jobs:
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: 'stable' go-version: 'stable'
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install protobuf-compiler
- run: | - run: |
go install github.com/swaggo/swag/cmd/swag@latest go install github.com/swaggo/swag/cmd/swag@latest
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Bump swagger 🔧 - name: Bump swagger 🔧
run: | run: |
make protogen-go swagger make swagger
- name: Create Pull Request - name: Create Pull Request
uses: peter-evans/create-pull-request@v7 uses: peter-evans/create-pull-request@v6
with: with:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI

3
.gitignore vendored
View File

@@ -54,6 +54,3 @@ docs/static/gallery.html
# backend virtual environments # backend virtual environments
**/venv **/venv
# per-developer customization files for the development container
.devcontainer/customization/*

21
.vscode/launch.json vendored
View File

@@ -3,12 +3,12 @@
"configurations": [ "configurations": [
{ {
"name": "Python: Current File", "name": "Python: Current File",
"type": "debugpy", "type": "python",
"request": "launch", "request": "launch",
"program": "${file}", "program": "${file}",
"console": "integratedTerminal", "console": "integratedTerminal",
"justMyCode": false, "justMyCode": false,
"cwd": "${fileDirname}", "cwd": "${workspaceFolder}/examples/langchain-chroma",
"env": { "env": {
"OPENAI_API_BASE": "http://localhost:8080/v1", "OPENAI_API_BASE": "http://localhost:8080/v1",
"OPENAI_API_KEY": "abc" "OPENAI_API_KEY": "abc"
@@ -19,16 +19,15 @@
"type": "go", "type": "go",
"request": "launch", "request": "launch",
"mode": "debug", "mode": "debug",
"program": "${workspaceRoot}", "program": "${workspaceFolder}/main.go",
"args": [], "args": [
"api"
],
"env": { "env": {
"LOCALAI_LOG_LEVEL": "debug", "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"LOCALAI_P2P": "true", "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"LOCALAI_FEDERATED": "true" "DEBUG": "true"
}, }
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
"envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}"
} }
] ]
} }

View File

@@ -15,6 +15,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
- [Documentation](#documentation) - [Documentation](#documentation)
- [Community and Communication](#community-and-communication) - [Community and Communication](#community-and-communication)
## Getting Started ## Getting Started
### Prerequisites ### Prerequisites
@@ -52,7 +54,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
## Coding Guidelines ## Coding Guidelines
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here. - No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
## Testing ## Testing
@@ -82,3 +84,5 @@ We are welcome the contribution of the documents, please open new PR or create a
- You can reach out via the Github issue tracker. - You can reach out via the Github issue tracker.
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions) - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy) - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
---

View File

@@ -8,14 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root USER root
ARG GO_VERSION=1.22.6 ARG GO_VERSION=1.22.4
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH ARG TARGETARCH
ARG TARGETVARIANT ARG TARGETVARIANT
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
RUN apt-get update && \ RUN apt-get update && \
@@ -23,48 +21,33 @@ RUN apt-get update && \
build-essential \ build-essential \
ccache \ ccache \
ca-certificates \ ca-certificates \
curl libssl-dev \ cmake \
curl \
git \ git \
unzip upx-ucl && \ unzip && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go # Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers # Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.1 && \
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates RUN update-ca-certificates
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Use the variables in subsequent instructions # Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH" RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT" RUN echo "Target Variant: $TARGETVARIANT"
# Cuda # Cuda
ENV PATH=/usr/local/cuda/bin:${PATH} ENV PATH /usr/local/cuda/bin:${PATH}
# HipBLAS requirements # HipBLAS requirements
ENV PATH=/opt/rocm/bin:${PATH} ENV PATH /opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion # OpenBLAS requirements and stable diffusion
RUN apt-get update && \ RUN apt-get update && \
@@ -79,6 +62,9 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
WORKDIR /build WORKDIR /build
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
################################### ###################################
################################### ###################################
@@ -95,7 +81,7 @@ RUN apt-get update && \
espeak \ espeak \
python3-pip \ python3-pip \
python-is-python3 \ python-is-python3 \
python3-dev llvm \ python3-dev \
python3-venv && \ python3-venv && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* && \ rm -rf /var/lib/apt/lists/* && \
@@ -112,39 +98,43 @@ RUN pip install --user grpcio-tools
FROM requirements-${IMAGE_TYPE} AS requirements-drivers FROM requirements-${IMAGE_TYPE} AS requirements-drivers
ARG BUILD_TYPE ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=0 ARG CUDA_MINOR_VERSION=8
ENV BUILD_TYPE=${BUILD_TYPE} ENV BUILD_TYPE=${BUILD_TYPE}
# Vulkan requirements
RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ]; then
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \
apt-get install -y \
vulkan-sdk && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# CuBLAS requirements # CuBLAS requirements
RUN <<EOT bash RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ]; then if [ "${BUILD_TYPE}" = "cublas" ]; then
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
software-properties-common pciutils software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi fi
if [ "arm64" = "$TARGETARCH" ]; then if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \ apt-get update && \
@@ -156,9 +146,8 @@ RUN <<EOT bash
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/* \
fi ; fi
EOT
# If we are building with clblas support, we need the libraries for the builds # If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
@@ -201,9 +190,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0 ARG GRPC_VERSION=v1.64.2
ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.26.4
ENV MAKEFLAGS=${GRPC_MAKEFLAGS} ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -212,31 +199,18 @@ WORKDIR /build
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
ca-certificates \ ca-certificates \
build-essential curl libssl-dev \ build-essential \
cmake \
git && \ git && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later # We install GRPC to a different prefix here so that we can copy in only the build artifacts later
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree # saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
# and running make install in the target container # and running make install in the target container
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
mkdir -p /build/grpc/cmake/build && \ mkdir -p /build/grpc/cmake/build && \
cd /build/grpc/cmake/build && \ cd /build/grpc/cmake/build && \
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \ cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
make && \ make && \
make install && \ make install && \
@@ -245,14 +219,13 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
################################### ###################################
################################### ###################################
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM requirements-drivers AS builder-base FROM requirements-drivers AS builder
ARG GO_TAGS="stablediffusion tts p2p" ARG GO_TAGS="stablediffusion tts p2p"
ARG GRPC_BACKENDS ARG GRPC_BACKENDS
ARG MAKEFLAGS ARG MAKEFLAGS
ARG LD_FLAGS="-s -w"
ENV GRPC_BACKENDS=${GRPC_BACKENDS} ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS} ENV GO_TAGS=${GO_TAGS}
@@ -260,12 +233,14 @@ ENV MAKEFLAGS=${MAKEFLAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_VISIBLE_DEVICES=all
ENV LD_FLAGS=${LD_FLAGS}
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
WORKDIR /build WORKDIR /build
COPY . .
COPY .git .
RUN echo "GO_TAGS: $GO_TAGS"
RUN make prepare
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below # We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only # but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
@@ -283,56 +258,15 @@ RUN <<EOT bash
fi fi
EOT EOT
# stablediffusion does not tolerate a newer version of abseil, build it first
################################### RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
###################################
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
FROM builder-base AS builder-sd
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
COPY Makefile .
COPY go.mod .
COPY go.sum .
COPY backend/backend.proto ./backend/backend.proto
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
COPY pkg/grpc ./pkg/grpc
COPY pkg/stablediffusion ./pkg/stablediffusion
RUN git init
RUN make sources/go-stable-diffusion
RUN touch prepare-sources
# Actually build the backend
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
###################################
###################################
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM builder-sd AS builder
# Install the pre-built GRPC # Install the pre-built GRPC
COPY --from=grpc /opt/grpc /usr/local COPY --from=grpc /opt/grpc /usr/local
# Rebuild with defaults backends # Rebuild with defaults backends
WORKDIR /build WORKDIR /build
RUN make build
COPY . .
COPY .git .
RUN make prepare
## Build the binary
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \
make build; \
fi
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \ mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -342,40 +276,6 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
################################### ###################################
################################### ###################################
# The devcontainer target is not used on CI. It is a target for developers to use locally -
# rather than copying files it mounts them locally and leaves building to the developer
FROM builder-base AS devcontainer
ARG FFMPEG
COPY --from=grpc /opt/grpc /usr/local
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
COPY .devcontainer-scripts /.devcontainer-scripts
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ssh less wget
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
RUN go install github.com/go-delve/delve/cmd/dlv@latest
RUN go install github.com/mikefarah/yq/v4@latest
###################################
###################################
# This is the final target. The result of this target will be the image uploaded to the registry. # This is the final target. The result of this target will be the image uploaded to the registry.
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it. # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM requirements-drivers FROM requirements-drivers
@@ -392,7 +292,7 @@ ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV MAKEFLAGS=${MAKEFLAGS} ENV MAKEFLAGS=${MAKEFLAGS}
ARG CUDA_MAJOR_VERSION=12 ARG CUDA_MAJOR_VERSION=11
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all ENV NVIDIA_VISIBLE_DEVICES=all
@@ -426,7 +326,7 @@ COPY --from=builder /build/local-ai ./
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/ COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl) # do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
# Change the shell to bash so we can use [[ tests below # Change the shell to bash so we can use [[ tests below
SHELL ["/bin/bash", "-c"] SHELL ["/bin/bash", "-c"]
@@ -445,6 +345,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
; fi && \ ; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/transformers-musicgen \ make -C backend/python/transformers-musicgen \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama \
; fi ; fi
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
@@ -453,6 +356,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/openvoice \ make -C backend/python/openvoice \
; fi && \ ; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/petals \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/sentencetransformers \ make -C backend/python/sentencetransformers \
; fi && \ ; fi && \

321
Makefile
View File

@@ -3,45 +3,39 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet GOVET=$(GOCMD) vet
BINARY_NAME=local-ai BINARY_NAME=local-ai
DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be CPPLLAMA_VERSION?=963552903f51043ee947a8deeaaa7ec00bc3f1a4
CPPLLAMA_VERSION?=96776405a17034dcfd53d3ddf5d142d34bdbb657
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
# go-rwkv version # go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_CPP_VERSION?=420b6abc54008ab634f5887dc45bd77122c2f320
WHISPER_CPP_VERSION?=fdbfb460ed546452a5d53611bba66d10d842e719
# bert.cpp version # bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
# go-piper version # go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759 PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
# stablediffusion version # stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
# tinydream version # tinydream version
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
export BUILD_TYPE?= export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?= export CMAKE_ARGS?=
export BACKEND_LIBS?=
CGO_LDFLAGS?= CGO_LDFLAGS?=
CGO_LDFLAGS_WHISPER?= CGO_LDFLAGS_WHISPER?=
CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/ CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?= GO_TAGS?=
BUILD_ID?= BUILD_ID?=
@@ -54,13 +48,13 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
VERSION?=$(shell git describe --always --tags || echo "dev" ) VERSION?=$(shell git describe --always --tags || echo "dev" )
# go tool nm ./local-ai | grep Commit # go tool nm ./local-ai | grep Commit
LD_FLAGS?=-s -w LD_FLAGS?=
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)" override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)" override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
OPTIONAL_TARGETS?= OPTIONAL_TARGETS?=
export OS := $(shell uname -s) OS := $(shell uname -s)
ARCH := $(shell uname -m) ARCH := $(shell uname -m)
GREEN := $(shell tput -Txterm setaf 2) GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3) YELLOW := $(shell tput -Txterm setaf 3)
@@ -68,14 +62,6 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6) CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0) RESET := $(shell tput -Txterm sgr0)
UPX?=
# check if upx exists
ifeq (, $(shell which upx))
UPX=
else
UPX=$(shell which upx)
endif
# Default Docker bridge IP # Default Docker bridge IP
E2E_BRIDGE_IP?=172.17.0.1 E2E_BRIDGE_IP?=172.17.0.1
@@ -94,42 +80,29 @@ ifeq ($(OS),Darwin)
BUILD_TYPE=metal BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed. # disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal) else ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF CMAKE_ARGS+=-DLLAMA_METAL=OFF
export GGML_NO_ACCELERATE=1 export LLAMA_NO_ACCELERATE=1
export GGML_NO_METAL=1
endif endif
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
# -lcblas removed: it seems to always be listed as a duplicate flag. # -lcblas removed: it seems to always be listed as a duplicate flag.
CGO_LDFLAGS += -framework Accelerate CGO_LDFLAGS += -framework Accelerate
endif endif
else
CGO_LDFLAGS_WHISPER+=-lgomp
endif endif
ifeq ($(BUILD_TYPE),openblas) ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas CGO_LDFLAGS+=-lopenblas
export GGML_OPENBLAS=1 export WHISPER_OPENBLAS=1
endif endif
ifeq ($(BUILD_TYPE),cublas) ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export GGML_CUDA=1 export LLAMA_CUBLAS=1
export WHISPER_CUDA=1
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
endif endif
ifeq ($(BUILD_TYPE),vulkan)
CMAKE_ARGS+=-DGGML_VULKAN=1
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export GGML_SYCL=1
endif
ifeq ($(BUILD_TYPE),sycl_f16)
export GGML_SYCL_F16=1
endif
ifeq ($(BUILD_TYPE),hipblas) ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= /opt/rocm ROCM_HOME ?= /opt/rocm
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
@@ -138,26 +111,27 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here. # llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE= export STABLE_BUILD_TYPE=
export GGML_HIPBLAS=1 export WHISPER_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)" AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif endif
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
export GGML_METAL=1 export LLAMA_METAL=1
export WHISPER_METAL=1
endif endif
ifeq ($(BUILD_TYPE),clblas) ifeq ($(BUILD_TYPE),clblas)
CGO_LDFLAGS+=-lOpenCL -lclblast CGO_LDFLAGS+=-lOpenCL -lclblast
export GGML_OPENBLAS=1 export WHISPER_CLBLAST=1
endif endif
# glibc-static or glibc-devel-static required # glibc-static or glibc-devel-static required
ifeq ($(STATIC),true) ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static LD_FLAGS=-linkmode external -extldflags -static
endif endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion) ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
@@ -186,12 +160,11 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
TEST_PATHS?=./api/... ./pkg/... ./core/... TEST_PATHS?=./api/... ./pkg/... ./core/...
@@ -211,97 +184,69 @@ all: help
## BERT embeddings ## BERT embeddings
sources/go-bert.cpp: sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
cd sources/go-bert.cpp && \ cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a $(MAKE) -C sources/go-bert.cpp libgobert.a
## go-llama.cpp ## go-llama.cpp
sources/go-llama.cpp: sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
cd sources/go-llama.cpp && \ cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(GOLLAMA_REPO) && \
git fetch origin && \
git checkout $(GOLLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## go-piper ## go-piper
sources/go-piper: sources/go-piper:
mkdir -p sources/go-piper git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
cd sources/go-piper && \ cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(PIPER_REPO) && \
git fetch origin && \
git checkout $(PIPER_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-piper/libpiper_binding.a: sources/go-piper sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## GPT4ALL
sources/gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## RWKV ## RWKV
sources/go-rwkv.cpp: sources/go-rwkv.cpp:
mkdir -p sources/go-rwkv.cpp git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && \ cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(RWKV_REPO) && \
git fetch origin && \
git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## stable diffusion ## stable diffusion
sources/go-stable-diffusion: sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
cd sources/go-stable-diffusion && \ cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(STABLEDIFFUSION_REPO) && \
git fetch origin && \
git checkout $(STABLEDIFFUSION_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## tiny-dream ## tiny-dream
sources/go-tiny-dream: sources/go-tiny-dream:
mkdir -p sources/go-tiny-dream git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
cd sources/go-tiny-dream && \ cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(TINYDREAM_REPO) && \
git fetch origin && \
git checkout $(TINYDREAM_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
$(MAKE) -C sources/go-tiny-dream libtinydream.a $(MAKE) -C sources/go-tiny-dream libtinydream.a
## whisper ## whisper
sources/whisper.cpp: sources/whisper.cpp:
mkdir -p sources/whisper.cpp git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
cd sources/whisper.cpp && \ cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
git init && \
git remote add origin $(WHISPER_REPO) && \
git fetch origin && \
git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a cd sources/whisper.cpp && $(MAKE) libwhisper.a
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
replace: replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -311,6 +256,7 @@ replace:
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace: dropreplace:
@@ -321,6 +267,7 @@ dropreplace:
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace prepare-sources: get-sources replace
@@ -330,6 +277,7 @@ prepare-sources: get-sources replace
rebuild: ## Rebuilds the project rebuild: ## Rebuilds the project
$(GOCMD) clean -cache $(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean $(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C sources/go-rwkv.cpp clean $(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean $(MAKE) -C sources/go-stable-diffusion clean
@@ -359,49 +307,31 @@ clean-tests:
rm -rf test-dir rm -rf test-dir
rm -rf core/http/backend-assets rm -rf core/http/backend-assets
clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets
## Build: ## Build:
build: prepare backend-assets grpcs ## Build the project build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET}) $(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET}) $(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET}) $(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET}) $(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
ifneq ($(BACKEND_LIBS),)
$(MAKE) backend-assets/lib
cp -f $(BACKEND_LIBS) backend-assets/lib/
endif
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
build-minimal: build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
build-api: build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
backend-assets/lib:
mkdir -p backend-assets/lib
dist: dist:
$(MAKE) backend-assets/grpc/llama-cpp-avx2 STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
endif
ifeq ($(OS),Darwin) ifeq ($(OS),Darwin)
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback $(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
else else
$(MAKE) backend-assets/grpc/llama-cpp-cuda $(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas $(MAKE) backend-assets/grpc/llama-cpp-hipblas
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16 $(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32 $(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
endif endif
GO_TAGS="tts p2p" $(MAKE) build $(MAKE) build
ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/piper
endif
GO_TAGS="tts p2p" STATIC=true $(MAKE) build
mkdir -p release mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name # if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),) ifeq ($(BUILD_ID),)
@@ -412,9 +342,9 @@ else
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256 shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
endif endif
dist-cross-linux-arm64: dist-cross-linux-arm64:
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \ CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
STATIC=true $(MAKE) build $(MAKE) build
mkdir -p release mkdir -p release
# if BUILD_ID is empty, then we don't append it to the binary name # if BUILD_ID is empty, then we don't append it to the binary name
ifeq ($(BUILD_ID),) ifeq ($(BUILD_ID),)
@@ -452,7 +382,8 @@ test: prepare test-models/testmodel.ggml grpcs
export GO_TAGS="tts stablediffusion debug" export GO_TAGS="tts stablediffusion debug"
$(MAKE) prepare-test $(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-gpt4all
$(MAKE) test-llama $(MAKE) test-llama
$(MAKE) test-llama-gguf $(MAKE) test-llama-gguf
$(MAKE) test-tts $(MAKE) test-tts
@@ -462,46 +393,50 @@ prepare-e2e:
mkdir -p $(TEST_DIR) mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests . docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
run-e2e-image: run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures) ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
run-e2e-aio: protogen-go run-e2e-aio:
@echo 'Running e2e AIO tests' @echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
test-e2e: test-e2e:
@echo 'Running e2e tests' @echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \ BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
teardown-e2e: teardown-e2e:
rm -rf $(TEST_DIR) || true rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests) docker stop $$(docker ps -q --filter ancestor=localai-tests)
test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama: prepare-test test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
test-tts: prepare-test test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
test-stablediffusion: prepare-test test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
test-stores: backend-assets/grpc/local-store test-stores: backend-assets/grpc/local-store
mkdir -p tests/integration/backend-assets/grpc mkdir -p tests/integration/backend-assets/grpc
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/ cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
test-container: test-container:
docker build --target requirements -t local-ai-test-container . docker build --target requirements -t local-ai-test-container .
@@ -537,10 +472,10 @@ protogen-go-clean:
$(RM) bin/* $(RM) bin/*
.PHONY: protogen-python .PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
.PHONY: protogen-python-clean .PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen .PHONY: autogptq-protogen
autogptq-protogen: autogptq-protogen:
@@ -574,6 +509,14 @@ diffusers-protogen:
diffusers-protogen-clean: diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean $(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: exllama-protogen
exllama-protogen:
$(MAKE) -C backend/python/exllama protogen
.PHONY: exllama-protogen-clean
exllama-protogen-clean:
$(MAKE) -C backend/python/exllama protogen-clean
.PHONY: exllama2-protogen .PHONY: exllama2-protogen
exllama2-protogen: exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen $(MAKE) -C backend/python/exllama2 protogen
@@ -590,6 +533,14 @@ mamba-protogen:
mamba-protogen-clean: mamba-protogen-clean:
$(MAKE) -C backend/python/mamba protogen-clean $(MAKE) -C backend/python/mamba protogen-clean
.PHONY: petals-protogen
petals-protogen:
$(MAKE) -C backend/python/petals protogen
.PHONY: petals-protogen-clean
petals-protogen-clean:
$(MAKE) -C backend/python/petals protogen-clean
.PHONY: rerankers-protogen .PHONY: rerankers-protogen
rerankers-protogen: rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen $(MAKE) -C backend/python/rerankers protogen
@@ -670,6 +621,8 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/parler-tts $(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/vall-e-x $(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/openvoice $(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
$(MAKE) -C backend/python/exllama2 $(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python prepare-test-extra: protogen-python
@@ -690,21 +643,25 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
mkdir -p backend-assets/espeak-ng-data mkdir -p backend-assets/espeak-ng-data
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data @cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
mkdir -p backend-assets/gpt4all
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
backend-assets/grpc: protogen-go replace backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc mkdir -p backend-assets/grpc
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
endif CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
backend-assets/grpc/huggingface: backend-assets/grpc backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/huggingface
endif
backend/cpp/llama/llama.cpp: backend/cpp/llama/llama.cpp:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -732,71 +689,71 @@ else
endif endif
# This target is for manually building a variant with-auto detected flags # This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cpp cp -rf backend/cpp/llama backend/cpp/llama-cpp
$(MAKE) -C backend/cpp/llama-cpp purge $(MAKE) -C backend/cpp/llama-cpp purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET}) $(info ${GREEN}I llama-cpp build info:avx2${RESET})
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server $(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx2 cp -rf backend/cpp/llama backend/cpp/llama-avx2
$(MAKE) -C backend/cpp/llama-avx2 purge $(MAKE) -C backend/cpp/llama-avx2 purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET}) $(info ${GREEN}I llama-cpp build info:avx2${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2 cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-avx cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge $(MAKE) -C backend/cpp/llama-avx purge
$(info ${GREEN}I llama-cpp build info:avx${RESET}) $(info ${GREEN}I llama-cpp build info:avx${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-fallback cp -rf backend/cpp/llama backend/cpp/llama-fallback
$(MAKE) -C backend/cpp/llama-fallback purge $(MAKE) -C backend/cpp/llama-fallback purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET}) $(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
# TODO: every binary should have its own folder instead, so can have different metal implementations # TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/ cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-cuda cp -rf backend/cpp/llama backend/cpp/llama-cuda
$(MAKE) -C backend/cpp/llama-cuda purge $(MAKE) -C backend/cpp/llama-cuda purge
$(info ${GREEN}I llama-cpp build info:cuda${RESET}) $(info ${GREEN}I llama-cpp build info:cuda${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-hipblas cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge $(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET}) $(info ${GREEN}I llama-cpp build info:hipblas${RESET})
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16 cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
$(MAKE) -C backend/cpp/llama-sycl_f16 purge $(MAKE) -C backend/cpp/llama-sycl_f16 purge
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET}) $(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16 cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32 cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
$(MAKE) -C backend/cpp/llama-sycl_f32 purge $(MAKE) -C backend/cpp/llama-sycl_f32 purge
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET}) $(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32 cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-grpc cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge $(MAKE) -C backend/cpp/llama-grpc purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET}) $(info ${GREEN}I llama-cpp build info:grpc${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
@@ -806,50 +763,29 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml
endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper
endif
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/rwkv
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion
endif
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/tinydream
endif
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \ CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/whisper
endif
backend-assets/grpc/local-store: backend-assets/grpc backend-assets/grpc/local-store: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/local-store
endif
grpcs: prepare $(GRPC_BACKENDS) grpcs: prepare $(GRPC_BACKENDS)
@@ -867,17 +803,6 @@ docker:
--build-arg BUILD_TYPE=$(BUILD_TYPE) \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE) . -t $(DOCKER_IMAGE) .
docker-cuda11:
docker build \
--build-arg CUDA_MAJOR_VERSION=11 \
--build-arg CUDA_MINOR_VERSION=8 \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-t $(DOCKER_IMAGE)-cuda11 .
docker-aio: docker-aio:
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
docker build \ docker build \
@@ -891,7 +816,7 @@ docker-aio-all:
docker-image-intel: docker-image-intel:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -899,7 +824,7 @@ docker-image-intel:
docker-image-intel-xpu: docker-image-intel-xpu:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -914,7 +839,7 @@ gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets $(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
## Documentation ## Documentation
docs/layouts/_default: docs/layouts/_default:
mkdir -p docs/layouts/_default mkdir -p docs/layouts/_default
docs/static/gallery.html: docs/layouts/_default docs/static/gallery.html: docs/layouts/_default
@@ -929,4 +854,4 @@ docs-clean:
.PHONY: docs .PHONY: docs
docs: docs/static/gallery.html docs: docs/static/gallery.html
cd docs && hugo serve cd docs && hugo serve

View File

@@ -40,7 +40,7 @@
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
> >
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) > [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -48,13 +48,6 @@
![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e) ![screen](https://github.com/mudler/LocalAI/assets/2420543/20b5ccd2-8393-44f0-aaf6-87a23806381e)
Run the installer script:
```bash
curl https://localai.io/install.sh | sh
```
Or run with docker:
```bash ```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# Alternative images: # Alternative images:
@@ -68,33 +61,25 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[💻 Getting started](https://localai.io/basics/getting_started/index.html) [💻 Getting started](https://localai.io/basics/getting_started/index.html)
## 📰 Latest project news ## 🔥🔥 Hot topics / Roadmap
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io) [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) - 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
## 🔥🔥 Hot topics (looking for help): Hot topics (looking for contributors):
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
- Realtime API https://github.com/mudler/LocalAI/issues/3714
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
- Assistant API: https://github.com/mudler/LocalAI/issues/1273 - Assistant API: https://github.com/mudler/LocalAI/issues/1273
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999 - Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
- Vulkan: https://github.com/mudler/LocalAI/issues/1647 - Vulkan: https://github.com/mudler/LocalAI/issues/1647
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
@@ -111,7 +96,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
- 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
- 📈 [Reranker API](https://localai.io/features/reranker/) - 📈 [Reranker API](https://localai.io/features/reranker/)
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) - 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
- 🌍 Integrated WebUI!
## 💻 Usage ## 💻 Usage
@@ -140,7 +124,6 @@ Other:
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/ - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
@@ -154,7 +137,6 @@ Other:
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social) ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/) - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/) - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance) - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
f16: true f16: true
mmap: true mmap: true
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
f16: true f16: true
mmap: true mmap: true
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -1,6 +1,6 @@
name: stablediffusion name: stablediffusion
parameters: parameters:
model: Lykon/dreamshaper-8 model: runwayml/stable-diffusion-v1-5
backend: diffusers backend: diffusers
step: 25 step: 25
f16: true f16: true

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
mmap: false mmap: false
f16: false f16: false
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -16,7 +16,6 @@ service Backend {
rpc GenerateImage(GenerateImageRequest) returns (Result) {} rpc GenerateImage(GenerateImageRequest) returns (Result) {}
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {} rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
rpc TTS(TTSRequest) returns (Result) {} rpc TTS(TTSRequest) returns (Result) {}
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {} rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {} rpc Status(HealthMessage) returns (StatusResponse) {}
@@ -26,19 +25,6 @@ service Backend {
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {} rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
rpc Rerank(RerankRequest) returns (RerankResult) {} rpc Rerank(RerankRequest) returns (RerankResult) {}
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
}
// Define the empty request
message MetricsRequest {}
message MetricsResponse {
int32 slot_id = 1;
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
float tokens_per_second = 3;
int32 tokens_generated = 4;
int32 prompt_tokens_processed = 5;
} }
message RerankRequest { message RerankRequest {
@@ -147,9 +133,6 @@ message PredictOptions {
repeated string Images = 42; repeated string Images = 42;
bool UseTokenizerTemplate = 43; bool UseTokenizerTemplate = 43;
repeated Message Messages = 44; repeated Message Messages = 44;
repeated string Videos = 45;
repeated string Audios = 46;
string CorrelationId = 47;
} }
// The response message containing the result // The response message containing the result
@@ -247,7 +230,6 @@ message TranscriptRequest {
string dst = 2; string dst = 2;
string language = 3; string language = 3;
uint32 threads = 4; uint32 threads = 4;
bool translate = 5;
} }
message TranscriptResult { message TranscriptResult {
@@ -287,17 +269,6 @@ message TTSRequest {
optional string language = 5; optional string language = 5;
} }
message SoundGenerationRequest {
string text = 1;
string model = 2;
string dst = 3;
optional float duration = 4;
optional float temperature = 5;
optional bool sample = 6;
optional string src = 7;
optional int32 src_divisor = 8;
}
message TokenizationResponse { message TokenizationResponse {
int32 length = 1; int32 length = 1;
repeated int32 tokens = 2; repeated int32 tokens = 2;

View File

@@ -46,14 +46,9 @@ endif
$(INSTALLED_PACKAGES): grpc_build $(INSTALLED_PACKAGES): grpc_build
$(GRPC_REPO): $(GRPC_REPO):
mkdir -p $(GRPC_REPO)/grpc git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
cd $(GRPC_REPO)/grpc && \ cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
git init && \
git remote add origin $(GIT_REPO_LIB_GRPC) && \
git fetch origin && \
git checkout $(TAG_LIB_GRPC) && \
git submodule update --init --recursive --depth 1 --single-branch
$(GRPC_BUILD): $(GRPC_REPO) $(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD) mkdir -p $(GRPC_BUILD)
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install

View File

@@ -1,58 +1,45 @@
LLAMA_VERSION?= LLAMA_VERSION?=
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?= CMAKE_ARGS?=
BUILD_TYPE?= BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static # If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas) ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS # If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically # to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas) else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path # If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas) else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas) else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIPBLAS=ON CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation # If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here # But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin) else ifeq ($(OS),darwin)
ifneq ($(BUILD_TYPE),metal) ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF CMAKE_ARGS+=-DLLAMA_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
# Until this is tested properly, we disable embedded metal file
# as we already embed it as part of the LocalAI assets
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
TARGET+=--target ggml-metal
endif endif
endif endif
ifeq ($(BUILD_TYPE),sycl_f16) ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
endif endif
ifeq ($(BUILD_TYPE),sycl_f32) ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif endif
llama.cpp: llama.cpp:
mkdir -p llama.cpp git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
cd llama.cpp && \ if [ -z "$(LLAMA_VERSION)" ]; then \
git init && \ exit 1; \
git remote add origin $(LLAMA_REPO) && \ fi
git fetch origin && \ cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
git checkout -b build $(LLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
llama.cpp/examples/grpc-server: llama.cpp llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server mkdir -p llama.cpp/examples/grpc-server
@@ -74,9 +61,9 @@ clean: purge
grpc-server: llama.cpp llama.cpp/examples/grpc-server grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" @echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE))) ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \ bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)" cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
else else
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET) cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
endif endif
cp llama.cpp/build/bin/grpc-server . cp llama.cpp/build/bin/grpc-server .

View File

@@ -13,15 +13,15 @@
#include <getopt.h> #include <getopt.h>
#include "clip.h" #include "clip.h"
#include "llava.h" #include "llava.h"
#include "log.h"
#include "stb_image.h" #include "stb_image.h"
#include "common.h" #include "common.h"
#include "json.hpp" #include "json.hpp"
#include "llama.h" #include "llama.h"
#include "grammar-parser.h"
#include "backend.pb.h" #include "backend.pb.h"
#include "backend.grpc.pb.h" #include "backend.grpc.pb.h"
#include "utils.hpp" #include "utils.hpp"
#include "sampling.h"
// include std::regex // include std::regex
#include <cstddef> #include <cstddef>
#include <thread> #include <thread>
@@ -113,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret; std::string ret;
for (; begin != end; ++begin) for (; begin != end; ++begin)
{ {
ret += common_token_to_piece(ctx, *begin); ret += llama_token_to_piece(ctx, *begin);
} }
return ret; return ret;
} }
@@ -121,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
// format incomplete utf-8 multibyte character for output // format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token) static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{ {
std::string out = token == -1 ? "" : common_token_to_piece(ctx, token); std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character // if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token) // (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80) if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -203,8 +203,8 @@ struct llama_client_slot
std::string stopping_word; std::string stopping_word;
// sampling // sampling
struct common_sampler_params sparams; struct llama_sampling_params sparams;
common_sampler *ctx_sampling = nullptr; llama_sampling_context *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state int32_t ga_i = 0; // group-attention state
int32_t ga_n = 1; // group-attention factor int32_t ga_n = 1; // group-attention factor
@@ -257,7 +257,7 @@ struct llama_client_slot
images.clear(); images.clear();
} }
bool has_budget(common_params &global_params) { bool has_budget(gpt_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1) if (params.n_predict == -1 && global_params.n_predict == -1)
{ {
return true; // limitless return true; // limitless
@@ -398,7 +398,7 @@ struct llama_server_context
clip_ctx *clp_ctx = nullptr; clip_ctx *clp_ctx = nullptr;
common_params params; gpt_params params;
llama_batch batch; llama_batch batch;
@@ -441,7 +441,7 @@ struct llama_server_context
} }
} }
bool load_model(const common_params &params_) bool load_model(const gpt_params &params_)
{ {
params = params_; params = params_;
if (!params.mmproj.empty()) { if (!params.mmproj.empty()) {
@@ -449,7 +449,7 @@ struct llama_server_context
LOG_INFO("Multi Modal Mode Enabled", {}); LOG_INFO("Multi Modal Mode Enabled", {});
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1); clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
if(clp_ctx == nullptr) { if(clp_ctx == nullptr) {
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str()); LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
return false; return false;
} }
@@ -458,12 +458,10 @@ struct llama_server_context
} }
} }
common_init_result common_init = common_init_from_params(params); std::tie(model, ctx) = llama_init_from_gpt_params(params);
model = common_init.model;
ctx = common_init.context;
if (model == nullptr) if (model == nullptr)
{ {
LOG_ERR("unable to load model: %s", params.model.c_str()); LOG_ERROR("unable to load model", {{"model", params.model}});
return false; return false;
} }
@@ -471,7 +469,7 @@ struct llama_server_context
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx); const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_n_embd(model); const int n_embd_llm = llama_n_embd(model);
if (n_embd_clip != n_embd_llm) { if (n_embd_clip != n_embd_llm) {
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm); LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
llama_free(ctx); llama_free(ctx);
llama_free_model(model); llama_free_model(model);
return false; return false;
@@ -480,7 +478,7 @@ struct llama_server_context
n_ctx = llama_n_ctx(ctx); n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_add_bos_token(model); add_bos_token = llama_should_add_bos_token(model);
return true; return true;
} }
@@ -490,21 +488,11 @@ struct llama_server_context
std::vector<char> buf(1); std::vector<char> buf(1);
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size()); int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
if (res < 0) { if (res < 0) {
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__); LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
} }
} }
llama_client_slot* get_active_slot() {
for (llama_client_slot& slot : slots) {
// Check if the slot is currently processing
if (slot.is_processing()) {
return &slot; // Return the active slot
}
}
return nullptr; // No active slot found
}
void initialize() { void initialize() {
// create slots // create slots
all_slots_are_idle = true; all_slots_are_idle = true;
@@ -578,12 +566,12 @@ struct llama_server_context
std::vector<llama_token> p; std::vector<llama_token> p;
if (first) if (first)
{ {
p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
first = false; first = false;
} }
else else
{ {
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
} }
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end()); prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
} }
@@ -600,7 +588,7 @@ struct llama_server_context
else else
{ {
auto s = json_prompt.template get<std::string>(); auto s = json_prompt.template get<std::string>();
prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
} }
return prompt_tokens; return prompt_tokens;
@@ -629,7 +617,7 @@ struct llama_server_context
bool launch_slot_with_data(llama_client_slot* &slot, json data) { bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params; slot_params default_params;
common_sampler_params default_sparams; llama_sampling_params default_sparams;
slot->params.stream = json_value(data, "stream", false); slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false); slot->params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -638,7 +626,7 @@ struct llama_server_context
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z); slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p); slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range); slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent); slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
@@ -651,7 +639,7 @@ struct llama_server_context
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta); slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl); slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep); slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
slot->sparams.seed = json_value(data, "seed", default_sparams.seed); slot->params.seed = json_value(data, "seed", default_params.seed);
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar); slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs); slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep); slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
@@ -675,7 +663,6 @@ struct llama_server_context
slot->params.input_prefix = ""; slot->params.input_prefix = "";
} }
if (data.count("input_suffix") != 0) if (data.count("input_suffix") != 0)
{ {
slot->params.input_suffix = data["input_suffix"]; slot->params.input_suffix = data["input_suffix"];
@@ -694,10 +681,6 @@ struct llama_server_context
slot->prompt = ""; slot->prompt = "";
} }
if (json_value(data, "ignore_eos", false)) {
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
}
/*
slot->sparams.penalty_prompt_tokens.clear(); slot->sparams.penalty_prompt_tokens.clear();
slot->sparams.use_penalty_prompt_tokens = false; slot->sparams.use_penalty_prompt_tokens = false;
const auto &penalty_prompt = data.find("penalty_prompt"); const auto &penalty_prompt = data.find("penalty_prompt");
@@ -733,10 +716,14 @@ struct llama_server_context
slot->sparams.use_penalty_prompt_tokens = true; slot->sparams.use_penalty_prompt_tokens = true;
} }
} }
*/
slot->sparams.logit_bias.clear(); slot->sparams.logit_bias.clear();
if (json_value(data, "ignore_eos", false))
{
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
}
const auto &logit_bias = data.find("logit_bias"); const auto &logit_bias = data.find("logit_bias");
if (logit_bias != data.end() && logit_bias->is_array()) if (logit_bias != data.end() && logit_bias->is_array())
{ {
@@ -764,21 +751,21 @@ struct llama_server_context
llama_token tok = el[0].get<llama_token>(); llama_token tok = el[0].get<llama_token>();
if (tok >= 0 && tok < n_vocab) if (tok >= 0 && tok < n_vocab)
{ {
slot->sparams.logit_bias.push_back({tok, bias}); slot->sparams.logit_bias[tok] = bias;
} }
} }
else if (el[0].is_string()) else if (el[0].is_string())
{ {
auto toks = common_tokenize(model, el[0].get<std::string>(), false); auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks) for (auto tok : toks)
{ {
slot->sparams.logit_bias.push_back({tok, bias}); slot->sparams.logit_bias[tok] = bias;
} }
} }
} }
} }
} }
slot->params.antiprompt.clear(); slot->params.antiprompt.clear();
const auto &stop = data.find("stop"); const auto &stop = data.find("stop");
@@ -792,22 +779,24 @@ struct llama_server_context
} }
} }
} }
const auto & samplers = data.find("samplers"); const auto &samplers_sequence = data.find("samplers");
if (samplers != data.end() && samplers->is_array()) { if (samplers_sequence != data.end() && samplers_sequence->is_array())
{
std::vector<std::string> sampler_names; std::vector<std::string> sampler_names;
for (const auto & name : *samplers) { for (const auto &sampler_name : *samplers_sequence)
if (name.is_string()) { {
sampler_names.emplace_back(name); if (sampler_name.is_string())
} {
sampler_names.emplace_back(sampler_name);
} }
slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false); }
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
} }
else else
{ {
slot->sparams.samplers = default_sparams.samplers; slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
} }
if (multimodal) if (multimodal)
{ {
@@ -823,11 +812,10 @@ struct llama_server_context
img_sl.img_data = clip_image_u8_init(); img_sl.img_data = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
{ {
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d", LOG_ERROR("failed to load image", {
__func__, {"slot_id", slot->id},
slot->id, {"img_sl_id", img_sl.id}
img_sl.id });
);
return false; return false;
} }
LOG_VERBOSE("image loaded", { LOG_VERBOSE("image loaded", {
@@ -865,12 +853,12 @@ struct llama_server_context
} }
} }
if (!found) { if (!found) {
LOG("ERROR: Image with id: %i, not found.\n", img_id); LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
slot->images.clear(); slot->images.clear();
return false; return false;
} }
} catch (const std::invalid_argument& e) { } catch (const std::invalid_argument& e) {
LOG("Invalid image number id in prompt\n"); LOG_TEE("Invalid image number id in prompt\n");
slot->images.clear(); slot->images.clear();
return false; return false;
} }
@@ -885,10 +873,10 @@ struct llama_server_context
if (slot->ctx_sampling != nullptr) if (slot->ctx_sampling != nullptr)
{ {
common_sampler_free(slot->ctx_sampling); llama_sampling_free(slot->ctx_sampling);
} }
slot->ctx_sampling = common_sampler_init(model, slot->sparams); slot->ctx_sampling = llama_sampling_init(slot->sparams);
//llama_set_rng_seed(ctx, slot->params.seed); llama_set_rng_seed(ctx, slot->params.seed);
slot->command = LOAD_PROMPT; slot->command = LOAD_PROMPT;
all_slots_are_idle = false; all_slots_are_idle = false;
@@ -898,8 +886,6 @@ struct llama_server_context
{"task_id", slot->task_id}, {"task_id", slot->task_id},
}); });
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true; return true;
} }
@@ -914,13 +900,13 @@ struct llama_server_context
system_tokens.clear(); system_tokens.clear();
if (!system_prompt.empty()) { if (!system_prompt.empty()) {
system_tokens = common_tokenize(ctx, system_prompt, add_bos_token); system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
common_batch_clear(batch); llama_batch_clear(batch);
for (int i = 0; i < (int)system_tokens.size(); ++i) for (int i = 0; i < (int)system_tokens.size(); ++i)
{ {
common_batch_add(batch, system_tokens[i], i, { 0 }, false); llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
} }
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
@@ -938,7 +924,7 @@ struct llama_server_context
}; };
if (llama_decode(ctx, batch_view) != 0) if (llama_decode(ctx, batch_view) != 0)
{ {
LOG("%s: llama_decode() failed\n", __func__); LOG_TEE("%s: llama_decode() failed\n", __func__);
return; return;
} }
} }
@@ -950,7 +936,7 @@ struct llama_server_context
} }
} }
LOG("system prompt updated\n"); LOG_TEE("system prompt updated\n");
system_need_update = false; system_need_update = false;
} }
@@ -1009,20 +995,18 @@ struct llama_server_context
bool process_token(completion_token_output &result, llama_client_slot &slot) { bool process_token(completion_token_output &result, llama_client_slot &slot) {
// remember which tokens were sampled - used for repetition penalties during sampling // remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = common_token_to_piece(ctx, result.tok); const std::string token_str = llama_token_to_piece(ctx, result.tok);
slot.sampled = result.tok; slot.sampled = result.tok;
// search stop word and delete it // search stop word and delete it
slot.generated_text += token_str; slot.generated_text += token_str;
slot.has_next_token = true; slot.has_next_token = true;
/*
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1) if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
{ {
// we can change penalty_prompt_tokens because it is always created from scratch each request // we can change penalty_prompt_tokens because it is always created from scratch each request
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok); slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
} }
*/
// check if there is incomplete UTF-8 character at the end // check if there is incomplete UTF-8 character at the end
bool incomplete = false; bool incomplete = false;
@@ -1131,8 +1115,8 @@ struct llama_server_context
continue; continue;
} }
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) { if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG("Error processing the given image"); LOG_TEE("Error processing the given image");
return false; return false;
} }
@@ -1144,7 +1128,7 @@ struct llama_server_context
void send_error(task_server& task, const std::string &error) void send_error(task_server& task, const std::string &error)
{ {
LOG("task %i - error: %s\n", task.id, error.c_str()); LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
task_result res; task_result res;
res.id = task.id; res.id = task.id;
res.multitask_id = task.multitask_id; res.multitask_id = task.multitask_id;
@@ -1156,11 +1140,13 @@ struct llama_server_context
json get_formated_generation(llama_client_slot &slot) json get_formated_generation(llama_client_slot &slot)
{ {
std::vector<std::string> samplers; const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
samplers.reserve(slot.sparams.samplers.size()); const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
for (const auto & sampler : slot.sparams.samplers) eos_bias->second < 0.0f && std::isinf(eos_bias->second);
std::vector<std::string> samplers_sequence;
for (const auto &sampler_type : slot.sparams.samplers_sequence)
{ {
samplers.emplace_back(common_sampler_type_to_str(sampler)); samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
} }
return json { return json {
@@ -1175,11 +1161,13 @@ struct llama_server_context
{"top_p", slot.sparams.top_p}, {"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p}, {"min_p", slot.sparams.min_p},
{"tfs_z", slot.sparams.tfs_z}, {"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typ_p}, {"typical_p", slot.sparams.typical_p},
{"repeat_last_n", slot.sparams.penalty_last_n}, {"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat}, {"repeat_penalty", slot.sparams.penalty_repeat},
{"presence_penalty", slot.sparams.penalty_present}, {"presence_penalty", slot.sparams.penalty_present},
{"frequency_penalty", slot.sparams.penalty_freq}, {"frequency_penalty", slot.sparams.penalty_freq},
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
{"mirostat", slot.sparams.mirostat}, {"mirostat", slot.sparams.mirostat},
{"mirostat_tau", slot.sparams.mirostat_tau}, {"mirostat_tau", slot.sparams.mirostat_tau},
{"mirostat_eta", slot.sparams.mirostat_eta}, {"mirostat_eta", slot.sparams.mirostat_eta},
@@ -1187,13 +1175,13 @@ struct llama_server_context
{"stop", slot.params.antiprompt}, {"stop", slot.params.antiprompt},
{"n_predict", slot.params.n_predict}, {"n_predict", slot.params.n_predict},
{"n_keep", params.n_keep}, {"n_keep", params.n_keep},
{"ignore_eos", slot.sparams.ignore_eos}, {"ignore_eos", ignore_eos},
{"stream", slot.params.stream}, {"stream", slot.params.stream},
// {"logit_bias", slot.sparams.logit_bias}, {"logit_bias", slot.sparams.logit_bias},
{"n_probs", slot.sparams.n_probs}, {"n_probs", slot.sparams.n_probs},
{"min_keep", slot.sparams.min_keep}, {"min_keep", slot.sparams.min_keep},
{"grammar", slot.sparams.grammar}, {"grammar", slot.sparams.grammar},
{"samplers", samplers} {"samplers", samplers_sequence}
}; };
} }
@@ -1216,7 +1204,7 @@ struct llama_server_context
if (slot.sparams.n_probs > 0) if (slot.sparams.n_probs > 0)
{ {
std::vector<completion_token_output> probs_output = {}; std::vector<completion_token_output> probs_output = {};
const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false); const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
if (probs_pos < probs_stop_pos) if (probs_pos < probs_stop_pos)
@@ -1268,7 +1256,7 @@ struct llama_server_context
std::vector<completion_token_output> probs = {}; std::vector<completion_token_output> probs = {};
if (!slot.params.stream && slot.stopped_word) if (!slot.params.stream && slot.stopped_word)
{ {
const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false); const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size()); probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
} }
else else
@@ -1383,7 +1371,7 @@ struct llama_server_context
}; };
if (llama_decode(ctx, batch_view)) if (llama_decode(ctx, batch_view))
{ {
LOG("%s : failed to eval\n", __func__); LOG_TEE("%s : failed to eval\n", __func__);
return false; return false;
} }
} }
@@ -1401,14 +1389,14 @@ struct llama_server_context
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, }; llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
if (llama_decode(ctx, batch_img)) if (llama_decode(ctx, batch_img))
{ {
LOG("%s : failed to eval image\n", __func__); LOG_TEE("%s : failed to eval image\n", __func__);
return false; return false;
} }
slot.n_past += n_eval; slot.n_past += n_eval;
} }
image_idx++; image_idx++;
common_batch_clear(batch); llama_batch_clear(batch);
// append prefix of next image // append prefix of next image
const auto json_prompt = (image_idx >= (int) slot.images.size()) ? const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
@@ -1418,7 +1406,7 @@ struct llama_server_context
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
for (int i = 0; i < (int) append_tokens.size(); ++i) for (int i = 0; i < (int) append_tokens.size(); ++i)
{ {
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true); llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_past += 1; slot.n_past += 1;
} }
} }
@@ -1550,7 +1538,7 @@ struct llama_server_context
update_system_prompt(); update_system_prompt();
} }
common_batch_clear(batch); llama_batch_clear(batch);
if (all_slots_are_idle) if (all_slots_are_idle)
{ {
@@ -1584,7 +1572,7 @@ struct llama_server_context
slot.n_past = 0; slot.n_past = 0;
slot.truncated = false; slot.truncated = false;
slot.has_next_token = true; slot.has_next_token = true;
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size()); LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
continue; continue;
// END LOCALAI changes // END LOCALAI changes
@@ -1628,7 +1616,7 @@ struct llama_server_context
// TODO: we always have to take into account the "system_tokens" // TODO: we always have to take into account the "system_tokens"
// this is not great and needs to be improved somehow // this is not great and needs to be improved somehow
common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true); llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
slot.n_past += 1; slot.n_past += 1;
} }
@@ -1722,7 +1710,7 @@ struct llama_server_context
if (!slot.params.cache_prompt) if (!slot.params.cache_prompt)
{ {
common_sampler_reset(slot.ctx_sampling); llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0; slot.n_past = 0;
slot.n_past_se = 0; slot.n_past_se = 0;
@@ -1734,7 +1722,7 @@ struct llama_server_context
// push the prompt into the sampling context (do not apply grammar) // push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens) for (auto &token : prompt_tokens)
{ {
common_sampler_accept(slot.ctx_sampling, token, false); llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
} }
slot.n_past = common_part(slot.cache_tokens, prompt_tokens); slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1826,17 +1814,16 @@ struct llama_server_context
ga_i += ga_w/ga_n; ga_i += ga_w/ga_n;
} }
} }
common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false); llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
slot_npast++; slot_npast++;
} }
if (has_images && !ingest_images(slot, n_batch)) if (has_images && !ingest_images(slot, n_batch))
{ {
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d", LOG_ERROR("failed processing images", {
__func__, "slot_id", slot.id,
slot.id, "task_id", slot.task_id,
slot.task_id });
);
// FIXME @phymbert: to be properly tested // FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever // early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value // no one at the moment is checking the return value
@@ -1876,10 +1863,10 @@ struct llama_server_context
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1); const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w; const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
LOG("\n"); LOG_TEE("\n");
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd); LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n); LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd); LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd); llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n); llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
@@ -1889,7 +1876,7 @@ struct llama_server_context
slot.ga_i += slot.ga_w / slot.ga_n; slot.ga_i += slot.ga_w / slot.ga_n;
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i); LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
} }
slot.n_past_se += n_tokens; slot.n_past_se += n_tokens;
} }
@@ -1914,11 +1901,11 @@ struct llama_server_context
if (n_batch == 1 || ret < 0) if (n_batch == 1 || ret < 0)
{ {
// if you get here, it means the KV cache is full - try increasing it via the context size // if you get here, it means the KV cache is full - try increasing it via the context size
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret); LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
return false; return false;
} }
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2); LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
// retry with half the batch size to try to find a free slot in the KV cache // retry with half the batch size to try to find a free slot in the KV cache
n_batch /= 2; n_batch /= 2;
@@ -1943,9 +1930,9 @@ struct llama_server_context
} }
completion_token_output result; completion_token_output result;
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i); const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
common_sampler_accept(slot.ctx_sampling, id, true); llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
slot.n_decoded += 1; slot.n_decoded += 1;
if (slot.n_decoded == 1) if (slot.n_decoded == 1)
@@ -1955,14 +1942,19 @@ struct llama_server_context
metrics.on_prompt_eval(slot); metrics.on_prompt_eval(slot);
} }
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
result.tok = id; result.tok = id;
const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling);
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) { const int32_t n_probs = slot.sparams.n_probs;
result.probs.push_back({ if (slot.sparams.temp <= 0 && n_probs > 0)
cur_p->data[i].id, {
i >= cur_p->size ? 0.0f : cur_p->data[i].p, // for llama_sample_token_greedy we need to sort candidates
}); llama_sample_softmax(ctx, &cur_p);
}
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
{
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
} }
if (!process_token(result, slot)) if (!process_token(result, slot))
@@ -2009,7 +2001,7 @@ static json format_partial_response(
struct token_translator struct token_translator
{ {
llama_context * ctx; llama_context * ctx;
std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); } std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); } std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
}; };
@@ -2114,10 +2106,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["grammar"] = predict->grammar(); data["grammar"] = predict->grammar();
data["prompt"] = predict->prompt(); data["prompt"] = predict->prompt();
data["ignore_eos"] = predict->ignoreeos(); data["ignore_eos"] = predict->ignoreeos();
data["embeddings"] = predict->embeddings();
// Add the correlationid to json data
data["correlation_id"] = predict->correlationid();
// for each image in the request, add the image data // for each image in the request, add the image data
// //
@@ -2203,7 +2191,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// } // }
static void params_parse(const backend::ModelOptions* request, static void params_parse(const backend::ModelOptions* request,
common_params & params) { gpt_params & params) {
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809 // this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
@@ -2217,7 +2205,7 @@ static void params_parse(const backend::ModelOptions* request,
params.model_alias = request->modelfile(); params.model_alias = request->modelfile();
params.n_ctx = request->contextsize(); params.n_ctx = request->contextsize();
//params.memory_f16 = request->f16memory(); //params.memory_f16 = request->f16memory();
params.cpuparams.n_threads = request->threads(); params.n_threads = request->threads();
params.n_gpu_layers = request->ngpulayers(); params.n_gpu_layers = request->ngpulayers();
params.n_batch = request->nbatch(); params.n_batch = request->nbatch();
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1 // Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
@@ -2267,7 +2255,8 @@ static void params_parse(const backend::ModelOptions* request,
} }
// get the directory of modelfile // get the directory of modelfile
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\")); std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor }); params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
params.lora_base = model_dir + "/"+request->lorabase();
} }
params.use_mlock = request->mlock(); params.use_mlock = request->mlock();
params.use_mmap = request->mmap(); params.use_mmap = request->mmap();
@@ -2311,7 +2300,7 @@ public:
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) { grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
// Implement LoadModel RPC // Implement LoadModel RPC
common_params params; gpt_params params;
params_parse(request, params); params_parse(request, params);
llama_backend_init(); llama_backend_init();
@@ -2357,11 +2346,6 @@ public:
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply.set_prompt_tokens(tokens_evaluated); reply.set_prompt_tokens(tokens_evaluated);
// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});
// Send the reply // Send the reply
writer->Write(reply); writer->Write(reply);
@@ -2385,12 +2369,6 @@ public:
std::string completion_text; std::string completion_text;
task_result result = llama.queue_results.recv(task_id); task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) { if (!result.error && result.stop) {
// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});
completion_text = result.result_json.value("content", ""); completion_text = result.result_json.value("content", "");
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
@@ -2405,56 +2383,6 @@ public:
return grpc::Status::OK; return grpc::Status::OK;
} }
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
json data = parse_options(false, request, llama);
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
// get the result
task_result result = llama.queue_results.recv(task_id);
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
llama.queue_results.remove_waiting_task_id(task_id);
if (!result.error && result.stop) {
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
// loop the vector and set the embeddings results
for (int i = 0; i < embeddings.size(); i++) {
embeddingResult->add_embeddings(embeddings[i]);
}
}
else
{
return grpc::Status::OK;
}
return grpc::Status::OK;
}
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot();
if (active_slot != nullptr) {
// Calculate the tokens per second using existing logic
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
// Populate the response with metrics
response->set_slot_id(active_slot->id);
response->set_prompt_json_for_slot(active_slot->prompt.dump());
response->set_tokens_per_second(tokens_per_second);
response->set_tokens_generated(active_slot->n_decoded);
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
} else {
// Handle case when no active slot exists
response->set_slot_id(0);
response->set_prompt_json_for_slot("");
response->set_tokens_per_second(0);
response->set_tokens_generated(0);
response->set_prompt_tokens_processed(0);
}
return grpc::Status::OK;
}
}; };
void RunServer(const std::string& server_address) { void RunServer(const std::string& server_address) {

View File

@@ -1,13 +0,0 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 342042ff..224db9b5 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches));
for (int i = 0; i < num_patches; i++) {
- patches_data[i] = i + 1;
+ patches_data[i] = i;
}
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
free(patches_data);

View File

@@ -1,12 +1,5 @@
#!/bin/bash #!/bin/bash
## Patches
## Apply patches from the `patches` directory
for patch in $(ls patches); do
echo "Applying patch $patch"
patch -d llama.cpp/ -p1 < patches/$patch
done
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/ cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/ cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/examples/grpc-server/ cp -rfv json.hpp llama.cpp/examples/grpc-server/

View File

@@ -480,4 +480,31 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
} }
return ret; return ret;
}
//
// random string / id
//
static std::string random_string()
{
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
std::random_device rd;
std::mt19937 generator(rd());
std::string result(32, ' ');
for (int i = 0; i < 32; ++i) {
result[i] = str[generator() % str.size()];
}
return result;
}
static std::string gen_chatcmplid()
{
std::stringstream chatcmplid;
chatcmplid << "chatcmpl-" << random_string();
return chatcmplid.str();
} }

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -3,9 +3,9 @@ package main
// This is a wrapper to statisfy the GRPC service interface // This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import ( import (
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/stablediffusion" "github.com/go-skynet/LocalAI/pkg/stablediffusion"
) )
type Image struct { type Image struct {

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -3,9 +3,9 @@ package main
// This is a wrapper to statisfy the GRPC service interface // This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import ( import (
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/tinydream" "github.com/go-skynet/LocalAI/pkg/tinydream"
) )
type Image struct { type Image struct {

View File

@@ -5,8 +5,8 @@ package main
import ( import (
bert "github.com/go-skynet/go-bert.cpp" bert "github.com/go-skynet/go-bert.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
) )
type Embeddings struct { type Embeddings struct {

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -0,0 +1,62 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
type LLM struct {
base.SingleThread
gpt4all *gpt4all.Model
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := gpt4all.New(opts.ModelFile,
gpt4all.SetThreads(int(opts.Threads)),
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
llm.gpt4all = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(float64(opts.Temperature)),
gpt4all.SetTopP(float64(opts.TopP)),
gpt4all.SetTopK(int(opts.TopK)),
gpt4all.SetTokens(int(opts.Tokens)),
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
}
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
go func() {
llm.gpt4all.SetTokenCallback(func(token string) bool {
results <- token
return true
})
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
llm.gpt4all.SetTokenCallback(nil)
close(results)
}()
return nil
}

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -6,9 +6,9 @@ import (
"fmt" "fmt"
"os" "os"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/langchain" "github.com/go-skynet/LocalAI/pkg/langchain"
) )
type LLM struct { type LLM struct {

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -5,9 +5,9 @@ package main
import ( import (
"fmt" "fmt"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp" "github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
) )
type LLM struct { type LLM struct {

View File

@@ -3,7 +3,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -6,9 +6,9 @@ import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp" "github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
) )
type LLM struct { type LLM struct {

View File

@@ -7,7 +7,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -7,8 +7,8 @@ import (
"path/filepath" "path/filepath"
"github.com/donomii/go-rwkv.cpp" "github.com/donomii/go-rwkv.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
) )
const tokenizerSuffix = ".tokenizer.json" const tokenizerSuffix = ".tokenizer.json"
@@ -31,7 +31,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads())) model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
if model == nil { if model == nil {
return fmt.Errorf("rwkv could not load model") return fmt.Errorf("could not load model")
} }
llm.rwkv = model llm.rwkv = model
return nil return nil

View File

@@ -6,7 +6,7 @@ import (
"flag" "flag"
"os" "os"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/rs/zerolog" "github.com/rs/zerolog"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -8,8 +8,8 @@ import (
"math" "math"
"slices" "slices"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -0,0 +1,100 @@
package main
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/go-skynet/LocalAI/core/schema"
)
func ffmpegCommand(args []string) (string, error) {
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
cmd.Env = os.Environ()
out, err := cmd.CombinedOutput()
return string(out), err
}
// AudioToWav converts audio to wav for transcribe.
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
out, err := ffmpegCommand(commandArgs)
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
}
return nil
}
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.TranscriptionResult, error) {
res := schema.TranscriptionResult{}
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return res, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := audioToWav(audiopath, convertedPath); err != nil {
return res, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return res, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return res, err
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := model.NewContext()
if err != nil {
return res, err
}
context.SetThreads(threads)
if language != "" {
context.SetLanguage(language)
} else {
context.SetLanguage("auto")
}
if err := context.Process(data, nil, nil); err != nil {
return res, err
}
for {
s, err := context.NextSegment()
if err != nil {
break
}
var tokens []int
for _, t := range s.Tokens {
tokens = append(tokens, t.Id)
}
segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
res.Segments = append(res.Segments, segment)
res.Text += s.Text
}
return res, nil
}

View File

@@ -0,0 +1,26 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
)
type Whisper struct {
base.SingleThread
whisper whisper.Model
}
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
}

View File

@@ -1,105 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"os"
"path/filepath"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
type Whisper struct {
base.SingleThread
whisper whisper.Model
}
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return pb.TranscriptResult{}, err
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
return pb.TranscriptResult{}, err
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return pb.TranscriptResult{}, err
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return pb.TranscriptResult{}, err
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := sd.whisper.NewContext()
if err != nil {
return pb.TranscriptResult{}, err
}
context.SetThreads(uint(opts.Threads))
if opts.Language != "" {
context.SetLanguage(opts.Language)
} else {
context.SetLanguage("auto")
}
if opts.Translate {
context.SetTranslate(true)
}
if err := context.Process(data, nil, nil); err != nil {
return pb.TranscriptResult{}, err
}
segments := []*pb.TranscriptSegment{}
text := ""
for {
s, err := context.NextSegment()
if err != nil {
break
}
var tokens []int32
for _, t := range s.Tokens {
tokens = append(tokens, int32(t.Id))
}
segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
segments = append(segments, segment)
text += s.Text
}
return pb.TranscriptResult{
Segments: segments,
Text: text,
}, nil
}

View File

@@ -5,7 +5,7 @@ package main
import ( import (
"flag" "flag"
grpc "github.com/mudler/LocalAI/pkg/grpc" grpc "github.com/go-skynet/LocalAI/pkg/grpc"
) )
var ( var (

View File

@@ -7,8 +7,8 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
piper "github.com/mudler/go-piper" piper "github.com/mudler/go-piper"
) )

View File

@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch

View File

@@ -1 +0,0 @@
torch

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,7 @@
accelerate accelerate
auto-gptq==0.7.1 auto-gptq==0.7.1
grpcio==1.66.2 grpcio==1.64.0
protobuf protobuf
torch
certifi certifi
transformers transformers

View File

@@ -1,4 +0,0 @@
transformers
accelerate
torch
torchaudio

View File

@@ -1,5 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@@ -1,4 +0,0 @@
torch
torchaudio
transformers
accelerate

View File

@@ -1,5 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch torch
torchaudio torchaudio
transformers
accelerate

View File

@@ -3,6 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@@ -1,4 +1,6 @@
accelerate
bark==0.1.5 bark==0.1.5
grpcio==1.66.2 grpcio==1.64.0
protobuf protobuf
certifi certifi
transformers

View File

@@ -18,23 +18,10 @@
# source $(dirname $0)/../common/libbackend.sh # source $(dirname $0)/../common/libbackend.sh
# #
function init() { function init() {
# Name of the backend (directory name)
BACKEND_NAME=${PWD##*/} BACKEND_NAME=${PWD##*/}
# Path where all backends files are
MY_DIR=$(realpath `dirname $0`) MY_DIR=$(realpath `dirname $0`)
# Build type
BUILD_PROFILE=$(getBuildProfile) BUILD_PROFILE=$(getBuildProfile)
# Environment directory
EDIR=${MY_DIR}
# Allow to specify a custom env dir for shared environments
if [ "x${ENV_DIR}" != "x" ]; then
EDIR=${ENV_DIR}
fi
# If a backend has defined a list of valid build profiles... # If a backend has defined a list of valid build profiles...
if [ ! -z "${LIMIT_TARGETS}" ]; then if [ ! -z "${LIMIT_TARGETS}" ]; then
isValidTarget=$(checkTargets ${LIMIT_TARGETS}) isValidTarget=$(checkTargets ${LIMIT_TARGETS})
@@ -87,14 +74,13 @@ function getBuildProfile() {
# This function is idempotent, so you can call it as many times as you want and it will # This function is idempotent, so you can call it as many times as you want and it will
# always result in an activated virtual environment # always result in an activated virtual environment
function ensureVenv() { function ensureVenv() {
if [ ! -d "${EDIR}/venv" ]; then if [ ! -d "${MY_DIR}/venv" ]; then
uv venv ${EDIR}/venv uv venv ${MY_DIR}/venv
echo "virtualenv created" echo "virtualenv created"
fi fi
# Source if we are not already in a Virtual env if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then source ${MY_DIR}/venv/bin/activate
source ${EDIR}/venv/bin/activate
echo "virtualenv activated" echo "virtualenv activated"
fi fi
@@ -127,24 +113,13 @@ function installRequirements() {
# These are the requirements files we will attempt to install, in order # These are the requirements files we will attempt to install, in order
declare -a requirementFiles=( declare -a requirementFiles=(
"${EDIR}/requirements-install.txt" "${MY_DIR}/requirements-install.txt"
"${EDIR}/requirements.txt" "${MY_DIR}/requirements.txt"
"${EDIR}/requirements-${BUILD_TYPE}.txt" "${MY_DIR}/requirements-${BUILD_TYPE}.txt"
) )
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt") requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
fi
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
if [ "x${BUILD_TYPE}" == "x" ]; then
requirementFiles+=("${EDIR}/requirements-cpu.txt")
fi
requirementFiles+=("${EDIR}/requirements-after.txt")
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
fi fi
for reqFile in ${requirementFiles[@]}; do for reqFile in ${requirementFiles[@]}; do
@@ -173,13 +148,13 @@ function startBackend() {
ensureVenv ensureVenv
if [ ! -z ${BACKEND_FILE} ]; then if [ ! -z ${BACKEND_FILE} ]; then
exec python ${BACKEND_FILE} $@ python ${BACKEND_FILE} $@
elif [ -e "${MY_DIR}/server.py" ]; then elif [ -e "${MY_DIR}/server.py" ]; then
exec python ${MY_DIR}/server.py $@ python ${MY_DIR}/server.py $@
elif [ -e "${MY_DIR}/backend.py" ]; then elif [ -e "${MY_DIR}/backend.py" ]; then
exec python ${MY_DIR}/backend.py $@ python ${MY_DIR}/backend.py $@
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
exec python ${MY_DIR}/${BACKEND_NAME}.py $@ python ${MY_DIR}/${BACKEND_NAME}.py $@
fi fi
} }
@@ -235,4 +210,4 @@ function checkTargets() {
echo false echo false
} }
init init

View File

@@ -1,2 +1,2 @@
grpcio==1.66.2 grpcio==1.64.0
protobuf protobuf

View File

@@ -1,3 +0,0 @@
transformers
accelerate
torch

View File

@@ -1,5 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@@ -1,4 +0,0 @@
torch
torchaudio
transformers
accelerate

View File

@@ -1,5 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch torch
torchaudio torchaudio
transformers
accelerate

View File

@@ -3,6 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@@ -1,4 +1,6 @@
coqui-tts accelerate
grpcio==1.66.2 TTS==0.22.0
grpcio==1.64.0
protobuf protobuf
certifi certifi
transformers

View File

@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server This method sets up the gRPC service by starting the server
""" """
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(30) time.sleep(10)
def tearDown(self) -> None: def tearDown(self) -> None:
""" """

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from concurrent import futures from concurrent import futures
import traceback
import argparse import argparse
from collections import defaultdict from collections import defaultdict
from enum import Enum from enum import Enum
@@ -17,39 +17,35 @@ import backend_pb2_grpc
import grpc import grpc
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image, export_to_video from diffusers.utils import load_image,export_to_video
from compel import Compel, ReturnedEmbeddingsType from compel import Compel, ReturnedEmbeddingsType
from optimum.quanto import freeze, qfloat8, quantize
from transformers import CLIPTextModel, T5EncoderModel from transformers import CLIPTextModel
from safetensors.torch import load_file from safetensors.torch import load_file
_ONE_DAY_IN_SECONDS = 60 * 60 * 24 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
COMPEL = os.environ.get("COMPEL", "0") == "1" COMPEL=os.environ.get("COMPEL", "0") == "1"
XPU = os.environ.get("XPU", "0") == "1" XPU=os.environ.get("XPU", "0") == "1"
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1" CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1" SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8") CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
FPS = os.environ.get("FPS", "7") FPS=os.environ.get("FPS", "7")
DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1" DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
FRAMES = os.environ.get("FRAMES", "64") FRAMES=os.environ.get("FRAMES", "64")
if XPU: if XPU:
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
print(ipex.xpu.get_device_name(0)) print(ipex.xpu.get_device_name(0))
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287 # https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
def sc(self, clip_input, images): return images, [False for i in images] def sc(self, clip_input, images) : return images, [False for i in images]
# edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values # edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
safety_checker.StableDiffusionSafetyChecker.forward = sc safety_checker.StableDiffusionSafetyChecker.forward = sc
@@ -66,8 +62,6 @@ from diffusers.schedulers import (
PNDMScheduler, PNDMScheduler,
UniPCMultistepScheduler, UniPCMultistepScheduler,
) )
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39 # The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
# Credits to https://github.com/neggles # Credits to https://github.com/neggles
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111 # See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
@@ -142,12 +136,10 @@ def get_scheduler(name: str, config: dict = {}):
return sched_class.from_config(config) return sched_class.from_config(config)
# Implement the BackendServicer class with the service methods # Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer): class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context): def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8')) return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context): def LoadModel(self, request, context):
try: try:
print(f"Loading model {request.Model}...", file=sys.stderr) print(f"Loading model {request.Model}...", file=sys.stderr)
@@ -157,48 +149,46 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.F16Memory: if request.F16Memory:
torchType = torch.float16 torchType = torch.float16
variant = "fp16" variant="fp16"
local = False local = False
modelFile = request.Model modelFile = request.Model
self.cfg_scale = 7 self.cfg_scale = 7
self.PipelineType = request.PipelineType
if request.CFGScale != 0: if request.CFGScale != 0:
self.cfg_scale = request.CFGScale self.cfg_scale = request.CFGScale
clipmodel = "Lykon/dreamshaper-8" clipmodel = "runwayml/stable-diffusion-v1-5"
if request.CLIPModel != "": if request.CLIPModel != "":
clipmodel = request.CLIPModel clipmodel = request.CLIPModel
clipsubfolder = "text_encoder" clipsubfolder = "text_encoder"
if request.CLIPSubfolder != "": if request.CLIPSubfolder != "":
clipsubfolder = request.CLIPSubfolder clipsubfolder = request.CLIPSubfolder
# Check if ModelFile exists # Check if ModelFile exists
if request.ModelFile != "": if request.ModelFile != "":
if os.path.exists(request.ModelFile): if os.path.exists(request.ModelFile):
local = True local = True
modelFile = request.ModelFile modelFile = request.ModelFile
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
self.img2vid = False self.img2vid=False
self.txt2vid = False self.txt2vid=False
## img2img ## img2img
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""): if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile, self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
torch_dtype=torchType) torch_dtype=torchType)
else: else:
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline": elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
## img2vid ## img2vid
elif request.PipelineType == "StableVideoDiffusionPipeline": elif request.PipelineType == "StableVideoDiffusionPipeline":
self.img2vid = True self.img2vid=True
self.pipe = StableVideoDiffusionPipeline.from_pretrained( self.pipe = StableVideoDiffusionPipeline.from_pretrained(
request.Model, torch_dtype=torchType, variant=variant request.Model, torch_dtype=torchType, variant=variant
) )
@@ -207,87 +197,53 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
## text2img ## text2img
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "": elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model, self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=SAFETENSORS, use_safetensors=SAFETENSORS,
variant=variant) variant=variant)
elif request.PipelineType == "StableDiffusionPipeline": elif request.PipelineType == "StableDiffusionPipeline":
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionPipeline.from_single_file(modelFile, self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
torch_dtype=torchType) torch_dtype=torchType)
else: else:
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "DiffusionPipeline": elif request.PipelineType == "DiffusionPipeline":
self.pipe = DiffusionPipeline.from_pretrained(request.Model, self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "VideoDiffusionPipeline": elif request.PipelineType == "VideoDiffusionPipeline":
self.txt2vid = True self.txt2vid=True
self.pipe = DiffusionPipeline.from_pretrained(request.Model, self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionXLPipeline": elif request.PipelineType == "StableDiffusionXLPipeline":
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile, self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True) use_safetensors=True)
else: else:
self.pipe = StableDiffusionXLPipeline.from_pretrained( self.pipe = StableDiffusionXLPipeline.from_pretrained(
request.Model, request.Model,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True, use_safetensors=True,
variant=variant) variant=variant)
elif request.PipelineType == "StableDiffusion3Pipeline":
if fromSingleFile:
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
torch_dtype=torchType,
use_safetensors=True)
else:
self.pipe = StableDiffusion3Pipeline.from_pretrained(
request.Model,
torch_dtype=torchType,
use_safetensors=True,
variant=variant)
elif request.PipelineType == "FluxPipeline":
self.pipe = FluxPipeline.from_pretrained(
request.Model,
torch_dtype=torch.bfloat16)
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
elif request.PipelineType == "FluxTransformer2DModel":
dtype = torch.bfloat16
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
quantize(transformer, weights=qfloat8)
freeze(transformer)
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
quantize(text_encoder_2, weights=qfloat8)
freeze(text_encoder_2)
self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
self.pipe.transformer = transformer
self.pipe.text_encoder_2 = text_encoder_2
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
if CLIPSKIP and request.CLIPSkip != 0: if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip self.clip_skip = request.CLIPSkip
else: else:
self.clip_skip = 0 self.clip_skip = 0
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU # torch_dtype needs to be customized. float16 for GPU, float32 for CPU
# TODO: this needs to be customized # TODO: this needs to be customized
if request.SchedulerType != "": if request.SchedulerType != "":
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config) self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
if COMPEL: if COMPEL:
self.compel = Compel( self.compel = Compel(
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ],
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True] requires_pooled=[False, True]
) )
if request.ControlNet: if request.ControlNet:
self.controlnet = ControlNetModel.from_pretrained( self.controlnet = ControlNetModel.from_pretrained(
@@ -296,6 +252,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.controlnet = self.controlnet self.pipe.controlnet = self.controlnet
else: else:
self.controlnet = None self.controlnet = None
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
# Assume directory from request.ModelFile. # Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path # Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter: if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
@@ -308,17 +271,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.LoraAdapter: if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file ) # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter): if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType) self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
self.pipe.load_lora_weights(request.LoraAdapter)
else: else:
self.pipe.unet.load_attn_procs(request.LoraAdapter) self.pipe.unet.load_attn_procs(request.LoraAdapter)
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
except Exception as err: except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service # Implement your logic here for the LoadModel service
@@ -391,9 +347,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of values for the parameters # create a dictionary of values for the parameters
options = { options = {
"negative_prompt": request.negative_prompt, "negative_prompt": request.negative_prompt,
"width": request.width, "width": request.width,
"height": request.height, "height": request.height,
"num_inference_steps": steps, "num_inference_steps": steps,
} }
@@ -405,7 +361,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
options["image"] = pose_image options["image"] = pose_image
if CLIPSKIP and self.clip_skip != 0: if CLIPSKIP and self.clip_skip != 0:
options["clip_skip"] = self.clip_skip options["clip_skip"]=self.clip_skip
# Get the keys that we will build the args for our pipe for # Get the keys that we will build the args for our pipe for
keys = options.keys() keys = options.keys()
@@ -425,13 +381,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
request.seed request.seed
) )
if self.PipelineType == "FluxPipeline":
kwargs["max_sequence_length"] = 256
if self.PipelineType == "FluxTransformer2DModel":
kwargs["output_type"] = "pil"
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
if self.img2vid: if self.img2vid:
# Load the conditioning image # Load the conditioning image
image = load_image(request.src) image = load_image(request.src)
@@ -456,21 +405,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
image = self.pipe( image = self.pipe(
guidance_scale=self.cfg_scale, guidance_scale=self.cfg_scale,
**kwargs **kwargs
).images[0] ).images[0]
else: else:
# pass the kwargs dictionary to the self.pipe method # pass the kwargs dictionary to the self.pipe method
image = self.pipe( image = self.pipe(
prompt, prompt,
guidance_scale=self.cfg_scale, guidance_scale=self.cfg_scale,
**kwargs **kwargs
).images[0] ).images[0]
# save the result # save the result
image.save(request.dst) image.save(request.dst)
return backend_pb2.Result(message="Media generated", success=True) return backend_pb2.Result(message="Media generated", success=True)
def serve(address): def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
@@ -494,7 +442,6 @@ def serve(address):
except KeyboardInterrupt: except KeyboardInterrupt:
server.stop(0) server.stop(0)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.") parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument( parser.add_argument(
@@ -502,4 +449,4 @@ if __name__ == "__main__":
) )
args = parser.parse_args() args = parser.parse_args()
serve(args.addr) serve(args.addr)

View File

@@ -1,9 +0,0 @@
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
torch
optimum-quanto

View File

@@ -1,10 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -1,9 +0,0 @@
torch
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -1,11 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.3.1+rocm6.0 torch
torchvision==0.18.1+rocm6.0 torchvision
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

Some files were not shown because too many files have changed in this diff Show More