mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-07 21:22:58 -05:00
Compare commits
1 Commits
v3.0.0
...
functions_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ac47aeaddd |
@@ -1,17 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
cd /workspace
|
|
||||||
|
|
||||||
# Get the files into the volume without a bind mount
|
|
||||||
if [ ! -d ".git" ]; then
|
|
||||||
git clone https://github.com/mudler/LocalAI.git .
|
|
||||||
else
|
|
||||||
git fetch
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Standard Post-Create script completed."
|
|
||||||
|
|
||||||
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
|
|
||||||
echo "Launching customization postcreate.sh"
|
|
||||||
bash "/devcontainer-customization/postcreate.sh"
|
|
||||||
fi
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
cd /workspace
|
|
||||||
|
|
||||||
# Grab the pre-stashed backend assets to avoid build issues
|
|
||||||
cp -r /build/backend-assets /workspace/backend-assets
|
|
||||||
|
|
||||||
# Ensures generated source files are present upon load
|
|
||||||
make prepare
|
|
||||||
|
|
||||||
echo "Standard Post-Start script completed."
|
|
||||||
|
|
||||||
if [ -f "/devcontainer-customization/poststart.sh" ]; then
|
|
||||||
echo "Launching customization poststart.sh"
|
|
||||||
bash "/devcontainer-customization/poststart.sh"
|
|
||||||
fi
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# This file contains some really simple functions that are useful when building up customization scripts.
|
|
||||||
|
|
||||||
|
|
||||||
# Checks if the git config has a user registered - and sets it up if not.
|
|
||||||
#
|
|
||||||
# Param 1: name
|
|
||||||
# Param 2: email
|
|
||||||
#
|
|
||||||
config_user() {
|
|
||||||
echo "Configuring git for $1 <$2>"
|
|
||||||
local gcn=$(git config --global user.name)
|
|
||||||
if [ -z "${gcn}" ]; then
|
|
||||||
echo "Setting up git user / remote"
|
|
||||||
git config --global user.name "$1"
|
|
||||||
git config --global user.email "$2"
|
|
||||||
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
|
|
||||||
#
|
|
||||||
# Param 1: remote name
|
|
||||||
# Param 2: remote url
|
|
||||||
#
|
|
||||||
config_remote() {
|
|
||||||
echo "Adding git remote and fetching $2 as $1"
|
|
||||||
local gr=$(git remote -v | grep $1)
|
|
||||||
if [ -z "${gr}" ]; then
|
|
||||||
git remote add $1 $2
|
|
||||||
fi
|
|
||||||
git fetch $1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Setup special .ssh files
|
|
||||||
# Prints out lines of text to make things pretty
|
|
||||||
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
|
|
||||||
setup_ssh() {
|
|
||||||
echo "starting ~/.ssh directory setup..."
|
|
||||||
mkdir -p "${HOME}.ssh"
|
|
||||||
chmod 0700 "${HOME}/.ssh"
|
|
||||||
echo "-----"
|
|
||||||
local files=("$@")
|
|
||||||
for file in "${files[@]}" ; do
|
|
||||||
local cfile="/devcontainer-customization/${file}"
|
|
||||||
local hfile="${HOME}/.ssh/${file}"
|
|
||||||
if [ ! -f "${hfile}" ]; then
|
|
||||||
echo "copying \"${file}\""
|
|
||||||
cp "${cfile}" "${hfile}"
|
|
||||||
chmod 600 "${hfile}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
echo "~/.ssh directory setup complete!"
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
Place any additional resources your environment requires in this directory
|
|
||||||
|
|
||||||
Script hooks are currently called for:
|
|
||||||
`postcreate.sh` and `poststart.sh`
|
|
||||||
|
|
||||||
If files with those names exist here, they will be called at the end of the normal script.
|
|
||||||
|
|
||||||
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
|
|
||||||
|
|
||||||
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
|
|
||||||
|
|
||||||
```
|
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
source "/.devcontainer-scripts/utils.sh"
|
|
||||||
|
|
||||||
sshfiles=("config", "key.pub")
|
|
||||||
|
|
||||||
setup_ssh "${sshfiles[@]}"
|
|
||||||
|
|
||||||
config_user "YOUR NAME" "YOUR EMAIL"
|
|
||||||
|
|
||||||
config_remote "REMOTE NAME" "REMOTE URL"
|
|
||||||
|
|
||||||
```
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
{
|
|
||||||
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
|
|
||||||
"name": "LocalAI",
|
|
||||||
"workspaceFolder": "/workspace",
|
|
||||||
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
|
|
||||||
"service": "api",
|
|
||||||
"shutdownAction": "stopCompose",
|
|
||||||
"customizations": {
|
|
||||||
"vscode": {
|
|
||||||
"extensions": [
|
|
||||||
"golang.go",
|
|
||||||
"ms-vscode.makefile-tools",
|
|
||||||
"ms-azuretools.vscode-docker",
|
|
||||||
"ms-python.python",
|
|
||||||
"ms-python.debugpy",
|
|
||||||
"wayou.vscode-todo-highlight",
|
|
||||||
"waderyan.gitblame"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"forwardPorts": [8080, 3000],
|
|
||||||
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
|
|
||||||
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
|
|
||||||
}
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
services:
|
|
||||||
api:
|
|
||||||
build:
|
|
||||||
context: ..
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
target: devcontainer
|
|
||||||
args:
|
|
||||||
- FFMPEG=true
|
|
||||||
- IMAGE_TYPE=extras
|
|
||||||
- GO_TAGS=p2p tts
|
|
||||||
env_file:
|
|
||||||
- ../.env
|
|
||||||
ports:
|
|
||||||
- 8080:8080
|
|
||||||
volumes:
|
|
||||||
- localai_workspace:/workspace
|
|
||||||
- ../models:/host-models
|
|
||||||
- ./customization:/devcontainer-customization
|
|
||||||
command: /bin/sh -c "while sleep 1000; do :; done"
|
|
||||||
cap_add:
|
|
||||||
- SYS_PTRACE
|
|
||||||
security_opt:
|
|
||||||
- seccomp:unconfined
|
|
||||||
prometheus:
|
|
||||||
image: prom/prometheus
|
|
||||||
container_name: prometheus
|
|
||||||
command:
|
|
||||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
||||||
ports:
|
|
||||||
- 9090:9090
|
|
||||||
restart: unless-stopped
|
|
||||||
volumes:
|
|
||||||
- ./prometheus:/etc/prometheus
|
|
||||||
- prom_data:/prometheus
|
|
||||||
grafana:
|
|
||||||
image: grafana/grafana
|
|
||||||
container_name: grafana
|
|
||||||
ports:
|
|
||||||
- 3000:3000
|
|
||||||
restart: unless-stopped
|
|
||||||
environment:
|
|
||||||
- GF_SECURITY_ADMIN_USER=admin
|
|
||||||
- GF_SECURITY_ADMIN_PASSWORD=grafana
|
|
||||||
volumes:
|
|
||||||
- ./grafana:/etc/grafana/provisioning/datasources
|
|
||||||
volumes:
|
|
||||||
prom_data:
|
|
||||||
localai_workspace:
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
|
|
||||||
apiVersion: 1
|
|
||||||
|
|
||||||
datasources:
|
|
||||||
- name: Prometheus
|
|
||||||
type: prometheus
|
|
||||||
url: http://prometheus:9090
|
|
||||||
isDefault: true
|
|
||||||
access: proxy
|
|
||||||
editable: true
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
global:
|
|
||||||
scrape_interval: 15s
|
|
||||||
scrape_timeout: 10s
|
|
||||||
evaluation_interval: 15s
|
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- static_configs:
|
|
||||||
- targets: []
|
|
||||||
scheme: http
|
|
||||||
timeout: 10s
|
|
||||||
api_version: v1
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: prometheus
|
|
||||||
honor_timestamps: true
|
|
||||||
scrape_interval: 15s
|
|
||||||
scrape_timeout: 10s
|
|
||||||
metrics_path: /metrics
|
|
||||||
scheme: http
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- localhost:9090
|
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
.idea
|
.idea
|
||||||
.github
|
.github
|
||||||
.vscode
|
.vscode
|
||||||
.devcontainer
|
|
||||||
models
|
models
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
|
|||||||
26
.env
26
.env
@@ -29,9 +29,6 @@
|
|||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
|
|
||||||
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
|
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
||||||
@@ -41,12 +38,12 @@
|
|||||||
## Uncomment and set to true to enable rebuilding from source
|
## Uncomment and set to true to enable rebuilding from source
|
||||||
# REBUILD=true
|
# REBUILD=true
|
||||||
|
|
||||||
## Enable go tags, available: p2p, tts
|
## Enable go tags, available: stablediffusion, tts
|
||||||
## p2p: enable distributed inferencing
|
## stablediffusion: image generation with stablediffusion
|
||||||
## tts: enables text-to-speech with go-piper
|
## tts: enables text-to-speech with go-piper
|
||||||
## (requires REBUILD=true)
|
## (requires REBUILD=true)
|
||||||
#
|
#
|
||||||
# GO_TAGS=p2p
|
# GO_TAGS=stablediffusion
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
@@ -74,26 +71,9 @@
|
|||||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||||
# LLAMACPP_PARALLEL=1
|
# LLAMACPP_PARALLEL=1
|
||||||
|
|
||||||
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
|
||||||
# https://github.com/ggerganov/llama.cpp/pull/6829
|
|
||||||
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
|
|
||||||
# LLAMACPP_GRPC_SERVERS=""
|
|
||||||
|
|
||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
# LOCALAI_PARALLEL_REQUESTS=true
|
# LOCALAI_PARALLEL_REQUESTS=true
|
||||||
|
|
||||||
# Enable to allow p2p mode
|
|
||||||
# LOCALAI_P2P=true
|
|
||||||
|
|
||||||
# Enable to use federated mode
|
|
||||||
# LOCALAI_FEDERATED=true
|
|
||||||
|
|
||||||
# Enable to start federation server
|
|
||||||
# FEDERATED_SERVER=true
|
|
||||||
|
|
||||||
# Define to use federation token
|
|
||||||
# TOKEN=""
|
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
|
|||||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,2 +1 @@
|
|||||||
*.sh text eol=lf
|
*.sh text eol=lf
|
||||||
backend/cpp/llama/*.hpp linguist-vendored
|
|
||||||
13
.github/bump_deps.sh
vendored
13
.github/bump_deps.sh
vendored
@@ -6,17 +6,4 @@ VAR=$3
|
|||||||
|
|
||||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||||
|
|
||||||
# Read $VAR from Makefile (only first match)
|
|
||||||
set +e
|
|
||||||
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
|
|
||||||
set -e
|
|
||||||
|
|
||||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||||
|
|
||||||
if [ -z "$CURRENT_COMMIT" ]; then
|
|
||||||
echo "Could not find $VAR in Makefile."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
|
|
||||||
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
|
|
||||||
85
.github/check_and_update.py
vendored
85
.github/check_and_update.py
vendored
@@ -1,85 +0,0 @@
|
|||||||
import hashlib
|
|
||||||
from huggingface_hub import hf_hub_download, get_paths_info
|
|
||||||
import requests
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
uri = sys.argv[1]
|
|
||||||
file_name = uri.split('/')[-1]
|
|
||||||
|
|
||||||
# Function to parse the URI and determine download method
|
|
||||||
def parse_uri(uri):
|
|
||||||
if uri.startswith('huggingface://'):
|
|
||||||
repo_id = uri.split('://')[1]
|
|
||||||
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
|
||||||
elif 'huggingface.co' in uri:
|
|
||||||
parts = uri.split('/resolve/')
|
|
||||||
if len(parts) > 1:
|
|
||||||
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
|
||||||
return 'huggingface', repo_path
|
|
||||||
return 'direct', uri
|
|
||||||
|
|
||||||
def calculate_sha256(file_path):
|
|
||||||
sha256_hash = hashlib.sha256()
|
|
||||||
with open(file_path, 'rb') as f:
|
|
||||||
for byte_block in iter(lambda: f.read(4096), b''):
|
|
||||||
sha256_hash.update(byte_block)
|
|
||||||
return sha256_hash.hexdigest()
|
|
||||||
|
|
||||||
def manual_safety_check_hf(repo_id):
|
|
||||||
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
|
||||||
scan = scanResponse.json()
|
|
||||||
# Check if 'hasUnsafeFile' exists in the response
|
|
||||||
if 'hasUnsafeFile' in scan:
|
|
||||||
if scan['hasUnsafeFile']:
|
|
||||||
return scan
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
download_type, repo_id_or_url = parse_uri(uri)
|
|
||||||
|
|
||||||
new_checksum = None
|
|
||||||
file_path = None
|
|
||||||
|
|
||||||
# Decide download method based on URI type
|
|
||||||
if download_type == 'huggingface':
|
|
||||||
# Check if the repo is flagged as dangerous by HF
|
|
||||||
hazard = manual_safety_check_hf(repo_id_or_url)
|
|
||||||
if hazard != None:
|
|
||||||
print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
|
|
||||||
sys.exit(5)
|
|
||||||
# Use HF API to pull sha
|
|
||||||
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
|
|
||||||
try:
|
|
||||||
new_checksum = file.lfs.sha256
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
if new_checksum is None:
|
|
||||||
try:
|
|
||||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
|
||||||
except Exception as e:
|
|
||||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
response = requests.get(repo_id_or_url)
|
|
||||||
if response.status_code == 200:
|
|
||||||
with open(file_name, 'wb') as f:
|
|
||||||
f.write(response.content)
|
|
||||||
file_path = file_name
|
|
||||||
elif response.status_code == 404:
|
|
||||||
print(f'File not found: {response.status_code}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if new_checksum is None:
|
|
||||||
new_checksum = calculate_sha256(file_path)
|
|
||||||
print(new_checksum)
|
|
||||||
os.remove(file_path)
|
|
||||||
else:
|
|
||||||
print(new_checksum)
|
|
||||||
64
.github/checksum_checker.sh
vendored
64
.github/checksum_checker.sh
vendored
@@ -14,14 +14,62 @@ function check_and_update_checksum() {
|
|||||||
idx="$5"
|
idx="$5"
|
||||||
|
|
||||||
# Download the file and calculate new checksum using Python
|
# Download the file and calculate new checksum using Python
|
||||||
new_checksum=$(python3 ./.github/check_and_update.py $uri)
|
new_checksum=$(python3 -c "
|
||||||
result=$?
|
import hashlib
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
import requests
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
if [[ $result -eq 5 ]]; then
|
uri = '$uri'
|
||||||
echo "Contaminated entry detected, deleting entry for $model_name..."
|
file_name = uri.split('/')[-1]
|
||||||
yq eval -i "del([$idx])" "$input_yaml"
|
|
||||||
return
|
# Function to parse the URI and determine download method
|
||||||
fi
|
# Function to parse the URI and determine download method
|
||||||
|
def parse_uri(uri):
|
||||||
|
if uri.startswith('huggingface://'):
|
||||||
|
repo_id = uri.split('://')[1]
|
||||||
|
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
||||||
|
elif 'huggingface.co' in uri:
|
||||||
|
parts = uri.split('/resolve/')
|
||||||
|
if len(parts) > 1:
|
||||||
|
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
||||||
|
return 'huggingface', repo_path
|
||||||
|
return 'direct', uri
|
||||||
|
|
||||||
|
def calculate_sha256(file_path):
|
||||||
|
sha256_hash = hashlib.sha256()
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
for byte_block in iter(lambda: f.read(4096), b''):
|
||||||
|
sha256_hash.update(byte_block)
|
||||||
|
return sha256_hash.hexdigest()
|
||||||
|
|
||||||
|
download_type, repo_id_or_url = parse_uri(uri)
|
||||||
|
|
||||||
|
# Decide download method based on URI type
|
||||||
|
if download_type == 'huggingface':
|
||||||
|
try:
|
||||||
|
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||||
|
sys.exit(2)
|
||||||
|
else:
|
||||||
|
response = requests.get(repo_id_or_url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
with open(file_name, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
file_path = file_name
|
||||||
|
elif response.status_code == 404:
|
||||||
|
print(f'File not found: {response.status_code}', file=sys.stderr)
|
||||||
|
sys.exit(2)
|
||||||
|
else:
|
||||||
|
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(calculate_sha256(file_path))
|
||||||
|
# Clean up the downloaded file
|
||||||
|
os.remove(file_path)
|
||||||
|
")
|
||||||
|
|
||||||
if [[ "$new_checksum" == "" ]]; then
|
if [[ "$new_checksum" == "" ]]; then
|
||||||
echo "Error calculating checksum for $file_name. Skipping..."
|
echo "Error calculating checksum for $file_name. Skipping..."
|
||||||
@@ -31,7 +79,7 @@ function check_and_update_checksum() {
|
|||||||
echo "Checksum for $file_name: $new_checksum"
|
echo "Checksum for $file_name: $new_checksum"
|
||||||
|
|
||||||
# Compare and update the YAML file if checksums do not match
|
# Compare and update the YAML file if checksums do not match
|
||||||
|
result=$?
|
||||||
if [[ $result -eq 2 ]]; then
|
if [[ $result -eq 2 ]]; then
|
||||||
echo "File not found, deleting entry for $file_name..."
|
echo "File not found, deleting entry for $file_name..."
|
||||||
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
|
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
|
||||||
|
|||||||
304
.github/ci/modelslist.go
vendored
304
.github/ci/modelslist.go
vendored
@@ -1,304 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"html/template"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/microcosm-cc/bluemonday"
|
|
||||||
"gopkg.in/yaml.v3"
|
|
||||||
)
|
|
||||||
|
|
||||||
var modelPageTemplate string = `
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>LocalAI models</title>
|
|
||||||
<link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
|
|
||||||
<script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
|
|
||||||
|
|
||||||
<link
|
|
||||||
rel="stylesheet"
|
|
||||||
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
|
|
||||||
/>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
|
|
||||||
></script>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
|
|
||||||
></script>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
|
|
||||||
></script>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
|
|
||||||
></script>
|
|
||||||
|
|
||||||
<link href="/static/general.css" rel="stylesheet" />
|
|
||||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
|
||||||
<link
|
|
||||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
|
|
||||||
rel="stylesheet" />
|
|
||||||
<link
|
|
||||||
rel="stylesheet"
|
|
||||||
href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
|
|
||||||
<script src="https://cdn.tailwindcss.com/3.3.0"></script>
|
|
||||||
<script>
|
|
||||||
tailwind.config = {
|
|
||||||
darkMode: "class",
|
|
||||||
theme: {
|
|
||||||
fontFamily: {
|
|
||||||
sans: ["Roboto", "sans-serif"],
|
|
||||||
body: ["Roboto", "sans-serif"],
|
|
||||||
mono: ["ui-monospace", "monospace"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
corePlugins: {
|
|
||||||
preflight: false,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
</script>
|
|
||||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
|
|
||||||
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="bg-gray-900 text-gray-200">
|
|
||||||
<div class="flex flex-col min-h-screen">
|
|
||||||
|
|
||||||
<nav class="bg-gray-800 shadow-lg">
|
|
||||||
<div class="container mx-auto px-4 py-4">
|
|
||||||
<div class="flex items-center justify-between">
|
|
||||||
<div class="flex items-center">
|
|
||||||
<a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
|
|
||||||
<a href="/" class="text-white text-xl font-bold">LocalAI</a>
|
|
||||||
</div>
|
|
||||||
<!-- Menu button for small screens -->
|
|
||||||
<div class="lg:hidden">
|
|
||||||
<button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
|
|
||||||
<i class="fas fa-bars fa-lg"></i>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
<!-- Navigation links -->
|
|
||||||
<div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
|
|
||||||
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<!-- Collapsible menu for small screens -->
|
|
||||||
<div class="hidden lg:hidden" id="mobile-menu">
|
|
||||||
<div class="pt-4 pb-3 border-t border-gray-700">
|
|
||||||
|
|
||||||
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<style>
|
|
||||||
.is-hidden {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
|
|
||||||
<div class="container mx-auto px-4 flex-grow">
|
|
||||||
|
|
||||||
<div class="models mt-12">
|
|
||||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
|
||||||
LocalAI model gallery list </h2><br>
|
|
||||||
|
|
||||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
|
||||||
|
|
||||||
🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
|
|
||||||
<i class="fas fa-circle-info pr-2"></i>
|
|
||||||
</a></h2>
|
|
||||||
|
|
||||||
<h3>
|
|
||||||
Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
|
|
||||||
|
|
||||||
You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
|
|
||||||
</h3>
|
|
||||||
|
|
||||||
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
|
|
||||||
id="searchbox" placeholder="Live search keyword..">
|
|
||||||
<div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
|
|
||||||
{{ range $_, $model := .Models }}
|
|
||||||
<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
|
|
||||||
<div>
|
|
||||||
{{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
|
|
||||||
{{ if $model.Icon }}
|
|
||||||
{{ $icon = $model.Icon }}
|
|
||||||
{{ end }}
|
|
||||||
<div class="flex justify-center items-center">
|
|
||||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
|
|
||||||
</div>
|
|
||||||
<div class="p-6 text-surface dark:text-white">
|
|
||||||
<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
|
|
||||||
|
|
||||||
|
|
||||||
<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
<div class="px-6 pt-4 pb-2">
|
|
||||||
|
|
||||||
<!-- Modal toggle -->
|
|
||||||
<button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
|
|
||||||
More info
|
|
||||||
</button>
|
|
||||||
|
|
||||||
<!-- Main modal -->
|
|
||||||
<div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
|
|
||||||
<div class="relative p-4 w-full max-w-2xl max-h-full">
|
|
||||||
<!-- Modal content -->
|
|
||||||
<div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
|
|
||||||
<!-- Modal header -->
|
|
||||||
<div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
|
|
||||||
<h3 class="text-xl font-semibold text-gray-900 dark:text-white">
|
|
||||||
{{ $model.Name}}
|
|
||||||
</h3>
|
|
||||||
<button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
|
|
||||||
<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
|
|
||||||
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
|
|
||||||
</svg>
|
|
||||||
<span class="sr-only">Close modal</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
<!-- Modal body -->
|
|
||||||
<div class="p-4 md:p-5 space-y-4">
|
|
||||||
<div class="flex justify-center items-center">
|
|
||||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
|
||||||
{{ $model.Description }}
|
|
||||||
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
|
||||||
To install the model with the CLI, run: <br>
|
|
||||||
<code> local-ai models install {{$model.Name}} </code> <br>
|
|
||||||
|
|
||||||
<hr>
|
|
||||||
See also <a href="https://localai.io/models/" target="_blank" >
|
|
||||||
Installation <i class="fas fa-circle-info pr-2"></i>
|
|
||||||
</a> to see how to install models with the REST API.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
|
||||||
<ul>
|
|
||||||
{{ range $_, $u := $model.URLs }}
|
|
||||||
<li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
|
|
||||||
{{ end }}
|
|
||||||
</ul>
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<!-- Modal footer -->
|
|
||||||
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
|
|
||||||
<button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
var lazyLoadInstance = new LazyLoad({
|
|
||||||
// Your custom settings go here
|
|
||||||
});
|
|
||||||
|
|
||||||
let cards = document.querySelectorAll('.box')
|
|
||||||
|
|
||||||
function liveSearch() {
|
|
||||||
let search_query = document.getElementById("searchbox").value;
|
|
||||||
|
|
||||||
//Use innerText if all contents are visible
|
|
||||||
//Use textContent for including hidden elements
|
|
||||||
for (var i = 0; i < cards.length; i++) {
|
|
||||||
if(cards[i].textContent.toLowerCase()
|
|
||||||
.includes(search_query.toLowerCase())) {
|
|
||||||
cards[i].classList.remove("is-hidden");
|
|
||||||
} else {
|
|
||||||
cards[i].classList.add("is-hidden");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//A little delay
|
|
||||||
let typingTimer;
|
|
||||||
let typeInterval = 500;
|
|
||||||
let searchInput = document.getElementById('searchbox');
|
|
||||||
|
|
||||||
searchInput.addEventListener('keyup', () => {
|
|
||||||
clearTimeout(typingTimer);
|
|
||||||
typingTimer = setTimeout(liveSearch, typeInterval);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`
|
|
||||||
|
|
||||||
type GalleryModel struct {
|
|
||||||
Name string `json:"name" yaml:"name"`
|
|
||||||
URLs []string `json:"urls" yaml:"urls"`
|
|
||||||
Icon string `json:"icon" yaml:"icon"`
|
|
||||||
Description string `json:"description" yaml:"description"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
// read the YAML file which contains the models
|
|
||||||
|
|
||||||
f, err := ioutil.ReadFile(os.Args[1])
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Error reading file:", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
models := []*GalleryModel{}
|
|
||||||
err = yaml.Unmarshal(f, &models)
|
|
||||||
if err != nil {
|
|
||||||
// write to stderr
|
|
||||||
os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that all arbitrary text content is sanitized before display
|
|
||||||
for i, m := range models {
|
|
||||||
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
|
|
||||||
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
|
|
||||||
}
|
|
||||||
|
|
||||||
// render the template
|
|
||||||
data := struct {
|
|
||||||
Models []*GalleryModel
|
|
||||||
AvailableModels int
|
|
||||||
}{
|
|
||||||
Models: models,
|
|
||||||
AvailableModels: len(models),
|
|
||||||
}
|
|
||||||
tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
|
|
||||||
|
|
||||||
err = tmpl.Execute(os.Stdout, data)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Error executing template:", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
94
.github/dependabot.yml
vendored
94
.github/dependabot.yml
vendored
@@ -1,16 +1,10 @@
|
|||||||
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||||
version: 2
|
version: 2
|
||||||
updates:
|
updates:
|
||||||
- package-ecosystem: "gitsubmodule"
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "gomod"
|
- package-ecosystem: "gomod"
|
||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
ignore:
|
|
||||||
- dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
- package-ecosystem: "github-actions"
|
- package-ecosystem: "github-actions"
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
||||||
directory: "/"
|
directory: "/"
|
||||||
@@ -29,91 +23,3 @@ updates:
|
|||||||
schedule:
|
schedule:
|
||||||
# Check for updates to GitHub Actions every weekday
|
# Check for updates to GitHub Actions every weekday
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/bark"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/common/template"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/coqui"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/diffusers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/exllama"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/exllama2"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/mamba"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/openvoice"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/rerankers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/sentencetransformers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/transformers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/vllm"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/chainlit"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/functions"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/langchain/langchainpy-localai-example"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/langchain-chroma"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/streamlit-bot"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/k8sgpt"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/kubernetes"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/langchain"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "gomod"
|
|
||||||
directory: "/examples/semantic-todo"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/telegram-bot"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
|
|||||||
11
.github/labeler.yml
vendored
11
.github/labeler.yml
vendored
@@ -1,15 +1,6 @@
|
|||||||
enhancement:
|
enhancements:
|
||||||
- head-branch: ['^feature', 'feature']
|
- head-branch: ['^feature', 'feature']
|
||||||
|
|
||||||
dependencies:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'Makefile'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.mod'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.sum'
|
|
||||||
|
|
||||||
kind/documentation:
|
kind/documentation:
|
||||||
- any:
|
- any:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
|
|||||||
3
.github/release.yml
vendored
3
.github/release.yml
vendored
@@ -13,9 +13,6 @@ changelog:
|
|||||||
labels:
|
labels:
|
||||||
- bug
|
- bug
|
||||||
- regression
|
- regression
|
||||||
- title: "🖧 P2P area"
|
|
||||||
labels:
|
|
||||||
- area/p2p
|
|
||||||
- title: Exciting New Features 🎉
|
- title: Exciting New Features 🎉
|
||||||
labels:
|
labels:
|
||||||
- Semver-Minor
|
- Semver-Minor
|
||||||
|
|||||||
554
.github/workflows/backend.yml
vendored
554
.github/workflows/backend.yml
vendored
@@ -1,554 +0,0 @@
|
|||||||
---
|
|
||||||
name: 'build backend container images'
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
tags:
|
|
||||||
- '*'
|
|
||||||
#pull_request:
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ci-backends-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
backend-jobs:
|
|
||||||
uses: ./.github/workflows/backend_build.yml
|
|
||||||
with:
|
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
runs-on: ${{ matrix.runs-on }}
|
|
||||||
base-image: ${{ matrix.base-image }}
|
|
||||||
backend: ${{ matrix.backend }}
|
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
dockerfile: $${ matrix.dockerfile }}
|
|
||||||
context: $${ matrix.context }}
|
|
||||||
secrets:
|
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
#max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
# CUDA 11 builds
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "rerankers"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-rerankers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-vllm'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "vllm"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-vllm'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-transformers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "transformers"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-transformers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "diffusers"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-diffusers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# CUDA 11 additional backends
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "kokoro"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-kokoro'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "faster-whisper"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-faster-whisper'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-coqui'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "coqui"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-coqui'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-bark'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "bark"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-bark'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "chatterbox"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11-chatterbox'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# CUDA 12 builds
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "rerankers"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-rerankers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-vllm'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "vllm"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-vllm'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-transformers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "transformers"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-transformers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "diffusers"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-diffusers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# CUDA 12 additional backends
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "kokoro"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-kokoro'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "faster-whisper"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-faster-whisper'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-coqui'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "coqui"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-coqui'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-bark'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "bark"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-bark'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "chatterbox"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12-chatterbox'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# hipblas builds
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-rerankers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "rerankers"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-rerankers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-vllm'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "vllm"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-vllm'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-transformers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "transformers"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-transformers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-diffusers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "diffusers"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-diffusers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# ROCm additional backends
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-kokoro'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "kokoro"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-kokoro'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "faster-whisper"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-faster-whisper'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-coqui'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "coqui"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-coqui'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-rocm-hipblas-bark'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
backend: "bark"
|
|
||||||
latest-image: 'latest-gpu-rocm-hipblas-bark'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# sycl builds
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-rerankers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "rerankers"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-rerankers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f16-rerankers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "rerankers"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f16-rerankers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-vllm'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "vllm"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-vllm'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f16-vllm'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "vllm"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f16-vllm'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-transformers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "transformers"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-transformers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f16-transformers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "transformers"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f16-transformers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-diffusers'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "diffusers"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-diffusers'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# SYCL additional backends
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-kokoro'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "kokoro"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-kokoro'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f16-kokoro'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "kokoro"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f16-kokoro'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-faster-whisper'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "faster-whisper"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-faster-whisper'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f16-faster-whisper'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "faster-whisper"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f16-faster-whisper'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-coqui'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "coqui"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-coqui'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f16-coqui'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "coqui"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f16-coqui'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f32-bark'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "bark"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f32-bark'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-gpu-intel-sycl-f16-bark'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
backend: "bark"
|
|
||||||
latest-image: 'latest-gpu-intel-sycl-f16-bark'
|
|
||||||
dockerfile: "./backend/Dockerfile.python"
|
|
||||||
context: "./backend"
|
|
||||||
# bark-cpp
|
|
||||||
- build-type: ''
|
|
||||||
cuda-major-version: ""
|
|
||||||
cuda-minor-version: ""
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'true'
|
|
||||||
tag-suffix: '-bark-cpp'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
backend: "bark"
|
|
||||||
latest-image: 'latest-bark-cpp'
|
|
||||||
dockerfile: "./backend/Dockerfile.go"
|
|
||||||
context: "./"
|
|
||||||
252
.github/workflows/backend_build.yml
vendored
252
.github/workflows/backend_build.yml
vendored
@@ -1,252 +0,0 @@
|
|||||||
---
|
|
||||||
name: 'build python backend container images (reusable)'
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_call:
|
|
||||||
inputs:
|
|
||||||
base-image:
|
|
||||||
description: 'Base image'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
build-type:
|
|
||||||
description: 'Build type'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
cuda-major-version:
|
|
||||||
description: 'CUDA major version'
|
|
||||||
default: "12"
|
|
||||||
type: string
|
|
||||||
cuda-minor-version:
|
|
||||||
description: 'CUDA minor version'
|
|
||||||
default: "1"
|
|
||||||
type: string
|
|
||||||
platforms:
|
|
||||||
description: 'Platforms'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
tag-latest:
|
|
||||||
description: 'Tag latest'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
latest-image:
|
|
||||||
description: 'Tag latest'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
tag-suffix:
|
|
||||||
description: 'Tag suffix'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
runs-on:
|
|
||||||
description: 'Runs on'
|
|
||||||
required: true
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
backend:
|
|
||||||
description: 'Backend to build'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
context:
|
|
||||||
description: 'Build context'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
dockerfile:
|
|
||||||
description: 'Build Dockerfile'
|
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
secrets:
|
|
||||||
dockerUsername:
|
|
||||||
required: true
|
|
||||||
dockerPassword:
|
|
||||||
required: true
|
|
||||||
quayUsername:
|
|
||||||
required: true
|
|
||||||
quayPassword:
|
|
||||||
required: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
reusable_python_backend-build:
|
|
||||||
runs-on: ${{ inputs.runs-on }}
|
|
||||||
steps:
|
|
||||||
|
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
# this might remove tools that are actually needed,
|
|
||||||
# if set to "true" but frees about 6 GB
|
|
||||||
tool-cache: true
|
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: true
|
|
||||||
swap-storage: true
|
|
||||||
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Release space from worker
|
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
|
||||||
run: |
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
df -h
|
|
||||||
echo
|
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get remove -y microsoft-edge-stable || true
|
|
||||||
sudo apt-get remove -y firefox || true
|
|
||||||
sudo apt-get remove -y powershell || true
|
|
||||||
sudo apt-get remove -y r-base-core || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
sudo rm -rf /usr/share/dotnet || true
|
|
||||||
sudo rm -rf /opt/ghc || true
|
|
||||||
sudo rm -rf "/usr/local/share/boost" || true
|
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Docker meta
|
|
||||||
id: meta
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
quay.io/go-skynet/local-ai-backends
|
|
||||||
localai/localai-backends
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch
|
|
||||||
type=semver,pattern={{raw}}
|
|
||||||
type=sha
|
|
||||||
flavor: |
|
|
||||||
latest=${{ inputs.tag-latest }}
|
|
||||||
suffix=${{ inputs.tag-suffix }}
|
|
||||||
|
|
||||||
- name: Docker meta for PR
|
|
||||||
id: meta_pull_request
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
quay.io/go-skynet/ci-tests
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
type=semver,pattern={{raw}},suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
type=sha,suffix=${{ github.event.number }}-${{ inputs.backend }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
flavor: |
|
|
||||||
latest=${{ inputs.tag-latest }}
|
|
||||||
suffix=${{ inputs.tag-suffix }}
|
|
||||||
## End testing image
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@master
|
|
||||||
with:
|
|
||||||
platforms: all
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
id: buildx
|
|
||||||
uses: docker/setup-buildx-action@master
|
|
||||||
|
|
||||||
- name: Login to DockerHub
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.dockerUsername }}
|
|
||||||
password: ${{ secrets.dockerPassword }}
|
|
||||||
|
|
||||||
- name: Login to Quay.io
|
|
||||||
# if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: quay.io
|
|
||||||
username: ${{ secrets.quayUsername }}
|
|
||||||
password: ${{ secrets.quayPassword }}
|
|
||||||
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
|
||||||
BACKEND=${{ inputs.backend }}
|
|
||||||
context: ./backend
|
|
||||||
file: ./backend/Dockerfile.python
|
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
|
||||||
|
|
||||||
- name: Build and push (PR)
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
|
||||||
BACKEND=${{ inputs.backend }}
|
|
||||||
context: ./backend
|
|
||||||
file: ./backend/Dockerfile.python
|
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
push: true
|
|
||||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
|
||||||
|
|
||||||
- name: Cleanup
|
|
||||||
run: |
|
|
||||||
docker builder prune -f
|
|
||||||
docker system prune --force --volumes --all
|
|
||||||
|
|
||||||
- name: Latest tag
|
|
||||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
|
||||||
run: |
|
|
||||||
docker pull localai/localai-backends:${{ steps.meta.outputs.version }}
|
|
||||||
docker tag localai/localai-backends:${{ steps.meta.outputs.version }} localai/localai-backends:${{ inputs.latest-image }}
|
|
||||||
docker push localai/localai-backends:${{ inputs.latest-image }}
|
|
||||||
docker pull quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }}
|
|
||||||
docker tag quay.io/go-skynet/local-ai-backends:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
|
|
||||||
docker push quay.io/go-skynet/local-ai-backends:${{ inputs.latest-image }}
|
|
||||||
|
|
||||||
- name: job summary
|
|
||||||
run: |
|
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
46
.github/workflows/bump_deps.yaml
vendored
46
.github/workflows/bump_deps.yaml
vendored
@@ -9,17 +9,32 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- repository: "ggml-org/llama.cpp"
|
- repository: "go-skynet/go-llama.cpp"
|
||||||
|
variable: "GOLLAMA_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
- repository: "ggerganov/llama.cpp"
|
||||||
variable: "CPPLLAMA_VERSION"
|
variable: "CPPLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "ggml-org/whisper.cpp"
|
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||||
|
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
- repository: "donomii/go-rwkv.cpp"
|
||||||
|
variable: "RWKV_VERSION"
|
||||||
|
branch: "main"
|
||||||
|
- repository: "ggerganov/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "PABannier/bark.cpp"
|
- repository: "go-skynet/go-bert.cpp"
|
||||||
variable: "BARKCPP_VERSION"
|
variable: "BERT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
- repository: "go-skynet/bloomz.cpp"
|
||||||
|
variable: "BLOOMZ_VERSION"
|
||||||
branch: "main"
|
branch: "main"
|
||||||
- repository: "leejet/stable-diffusion.cpp"
|
- repository: "nomic-ai/gpt4all"
|
||||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
variable: "GPT4ALL_VERSION"
|
||||||
|
branch: "main"
|
||||||
|
- repository: "mudler/go-ggllm.cpp"
|
||||||
|
variable: "GOGGLLM_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "mudler/go-stable-diffusion"
|
- repository: "mudler/go-stable-diffusion"
|
||||||
variable: "STABLEDIFFUSION_VERSION"
|
variable: "STABLEDIFFUSION_VERSION"
|
||||||
@@ -31,30 +46,17 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
id: bump
|
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
cat "${{ matrix.variable }}_message.txt"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
{
|
|
||||||
echo 'commit<<EOF'
|
|
||||||
cat "${{ matrix.variable }}_commit.txt"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
rm -rfv ${{ matrix.variable }}_message.txt
|
|
||||||
rm -rfv ${{ matrix.variable }}_commit.txt
|
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
|
title: ':arrow_up: Update ${{ matrix.repository }}'
|
||||||
branch: "update/${{ matrix.variable }}"
|
branch: "update/${{ matrix.variable }}"
|
||||||
body: ${{ steps.bump.outputs.message }}
|
body: Bump of ${{ matrix.repository }} version
|
||||||
signoff: true
|
signoff: true
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
4
.github/workflows/bump_docs.yaml
vendored
4
.github/workflows/bump_docs.yaml
vendored
@@ -17,12 +17,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||||
title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
|
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||||
branch: "update/docs"
|
branch: "update/docs"
|
||||||
body: Bump of ${{ matrix.repository }} version inside docs
|
body: Bump of ${{ matrix.repository }} version inside docs
|
||||||
signoff: true
|
signoff: true
|
||||||
|
|||||||
12
.github/workflows/checksum_checker.yaml
vendored
12
.github/workflows/checksum_checker.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
|||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
jobs:
|
jobs:
|
||||||
checksum_check:
|
checksum_check:
|
||||||
runs-on: ubuntu-latest
|
runs-on: arc-runner-set
|
||||||
steps:
|
steps:
|
||||||
- name: Force Install GIT latest
|
- name: Force Install GIT latest
|
||||||
run: |
|
run: |
|
||||||
@@ -20,12 +20,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y pip wget
|
sudo apt-get install -y pip wget
|
||||||
sudo pip install --upgrade pip
|
sudo pip install --upgrade pip
|
||||||
pip install huggingface_hub
|
pip install huggingface_hub
|
||||||
- name: 'Setup yq'
|
- name: 'Setup yq'
|
||||||
uses: dcarbone/install-yq-action@v1.3.1
|
uses: dcarbone/install-yq-action@v1.1.1
|
||||||
with:
|
with:
|
||||||
version: 'v4.44.2'
|
version: 'v4.43.1'
|
||||||
download-compressed: true
|
download-compressed: true
|
||||||
force: true
|
force: true
|
||||||
|
|
||||||
@@ -36,12 +36,12 @@ jobs:
|
|||||||
sudo chmod 777 /hf_cache
|
sudo chmod 777 /hf_cache
|
||||||
bash .github/checksum_checker.sh gallery/index.yaml
|
bash .github/checksum_checker.sh gallery/index.yaml
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
|
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
|
||||||
title: 'chore(model-gallery): :arrow_up: update checksum'
|
title: 'models(gallery): :arrow_up: update checksum'
|
||||||
branch: "update/checksum"
|
branch: "update/checksum"
|
||||||
body: Updating checksums in gallery/index.yaml
|
body: Updating checksums in gallery/index.yaml
|
||||||
signoff: true
|
signoff: true
|
||||||
|
|||||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@v2.4.0
|
uses: dependabot/fetch-metadata@v2.1.0
|
||||||
with:
|
with:
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
|||||||
64
.github/workflows/deploy-explorer.yaml
vendored
64
.github/workflows/deploy-explorer.yaml
vendored
@@ -1,64 +0,0 @@
|
|||||||
name: Explorer deployment
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
tags:
|
|
||||||
- 'v*'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-linux:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
make protogen-go
|
|
||||||
- name: Build api
|
|
||||||
run: |
|
|
||||||
CGO_ENABLED=0 make build-api
|
|
||||||
- name: rm
|
|
||||||
uses: appleboy/ssh-action@v1.2.2
|
|
||||||
with:
|
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
|
||||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
|
||||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
|
||||||
script: |
|
|
||||||
sudo rm -rf local-ai/ || true
|
|
||||||
- name: copy file via ssh
|
|
||||||
uses: appleboy/scp-action@v1.0.0
|
|
||||||
with:
|
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
|
||||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
|
||||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
|
||||||
source: "local-ai"
|
|
||||||
overwrite: true
|
|
||||||
rm: true
|
|
||||||
target: ./local-ai
|
|
||||||
- name: restarting
|
|
||||||
uses: appleboy/ssh-action@v1.2.2
|
|
||||||
with:
|
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
|
||||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
|
||||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
|
||||||
script: |
|
|
||||||
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
|
|
||||||
sudo systemctl restart local-ai
|
|
||||||
83
.github/workflows/disabled/comment-pr.yaml
vendored
83
.github/workflows/disabled/comment-pr.yaml
vendored
@@ -1,83 +0,0 @@
|
|||||||
name: Comment PRs
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
comment-pr:
|
|
||||||
env:
|
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
ref: "${{ github.event.pull_request.merge_commit_sha }}"
|
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
|
||||||
- uses: mudler/localai-github-action@v1
|
|
||||||
with:
|
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
|
||||||
id: git-diff-action
|
|
||||||
with:
|
|
||||||
json_diff_file_output: diff.json
|
|
||||||
raw_diff_file_output: diff.txt
|
|
||||||
file_output_only: "true"
|
|
||||||
base_branch: ${{ github.event.pull_request.base.sha }}
|
|
||||||
- name: Show diff
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
run: |
|
|
||||||
cat $DIFF
|
|
||||||
- name: Summarize
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$(cat $DIFF)"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
echo "Summary:"
|
|
||||||
echo "$summary"
|
|
||||||
echo "payload sent"
|
|
||||||
echo "$json_payload"
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
docker logs --tail 10 local-ai
|
|
||||||
- uses: mshick/add-pr-comment@v2
|
|
||||||
if: always()
|
|
||||||
with:
|
|
||||||
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
|
||||||
message: ${{ steps.summarize.outputs.message }}
|
|
||||||
message-failure: |
|
|
||||||
Uh oh! Could not analyze this PR, maybe it's too big?
|
|
||||||
15
.github/workflows/generate_grpc_cache.yaml
vendored
15
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -2,10 +2,9 @@ name: 'generate and publish GRPC docker caches'
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
push:
|
||||||
schedule:
|
branches:
|
||||||
# daily at midnight
|
- master
|
||||||
- cron: '0 0 * * *'
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
@@ -18,7 +17,7 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- grpc-base-image: ubuntu:22.04
|
- grpc-base-image: ubuntu:22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
steps:
|
steps:
|
||||||
- name: Release space from worker
|
- name: Release space from worker
|
||||||
@@ -76,7 +75,7 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Cache GRPC
|
- name: Cache GRPC
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||||
@@ -85,11 +84,11 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.63.0
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-to: type=gha,ignore-error=true
|
cache-to: type=gha,ignore-error=true
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
target: grpc
|
target: grpc
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
push: false
|
push: false
|
||||||
59
.github/workflows/generate_intel_image.yaml
vendored
59
.github/workflows/generate_intel_image.yaml
vendored
@@ -1,59 +0,0 @@
|
|||||||
name: 'generate and publish intel docker caches'
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
generate_caches:
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
runs-on: ${{matrix.runs-on}}
|
|
||||||
steps:
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@master
|
|
||||||
with:
|
|
||||||
platforms: all
|
|
||||||
- name: Login to DockerHub
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
||||||
|
|
||||||
- name: Login to quay
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: quay.io
|
|
||||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
||||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
id: buildx
|
|
||||||
uses: docker/setup-buildx-action@master
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Cache Intel images
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BASE_IMAGE=${{ matrix.base-image }}
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
tags: quay.io/go-skynet/intel-oneapi-base:latest
|
|
||||||
push: true
|
|
||||||
target: intel
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
77
.github/workflows/image-pr.yml
vendored
77
.github/workflows/image-pr.yml
vendored
@@ -9,12 +9,13 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
image-build:
|
extras-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -31,18 +32,27 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
- build-type: ''
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
runs-on: 'ubuntu-latest'
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
@@ -50,24 +60,71 @@ jobs:
|
|||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
|
image-type: 'extras'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: 'sycl-f16-ffmpeg'
|
tag-suffix: 'sycl-f16-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
runs-on: 'ubuntu-latest'
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'vulkan'
|
core-image-build:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: ''
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan-ffmpeg-core'
|
tag-suffix: '-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "1"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
314
.github/workflows/image.yml
vendored
314
.github/workflows/image.yml
vendored
@@ -13,12 +13,13 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
hipblas-jobs:
|
self-hosted-jobs:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -36,131 +37,209 @@ jobs:
|
|||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: 'hipblas'
|
# Extra images
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
latest-image: 'latest-gpu-hipblas'
|
|
||||||
aio: "-aio-gpu-hipblas"
|
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
|
||||||
|
|
||||||
core-image-build:
|
|
||||||
uses: ./.github/workflows/image_build.yml
|
|
||||||
with:
|
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
runs-on: ${{ matrix.runs-on }}
|
|
||||||
aio: ${{ matrix.aio }}
|
|
||||||
base-image: ${{ matrix.base-image }}
|
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
skip-drivers: ${{ matrix.skip-drivers }}
|
|
||||||
secrets:
|
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
||||||
strategy:
|
|
||||||
#max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: ''
|
tag-suffix: ''
|
||||||
ffmpeg: 'true'
|
ffmpeg: ''
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
aio: "-aio-cpu"
|
- build-type: ''
|
||||||
latest-image: 'latest-cpu'
|
platforms: 'linux/amd64'
|
||||||
latest-image-aio: 'latest-aio-cpu'
|
tag-latest: 'auto'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
tag-suffix: '-ffmpeg'
|
||||||
skip-drivers: 'false'
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11'
|
tag-suffix: '-cublas-cuda11'
|
||||||
ffmpeg: 'true'
|
ffmpeg: ''
|
||||||
runs-on: 'ubuntu-latest'
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11'
|
|
||||||
aio: "-aio-gpu-nvidia-cuda-11"
|
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12'
|
tag-suffix: '-cublas-cuda12'
|
||||||
ffmpeg: 'true'
|
ffmpeg: ''
|
||||||
runs-on: 'ubuntu-latest'
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "11"
|
||||||
|
cuda-minor-version: "7"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
aio: "-aio-gpu-nvidia-cuda-11"
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-11'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "1"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
|
||||||
aio: "-aio-gpu-nvidia-cuda-12"
|
aio: "-aio-gpu-nvidia-cuda-12"
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
||||||
- build-type: 'vulkan'
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: ''
|
||||||
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: ''
|
||||||
|
ffmpeg: ''
|
||||||
|
image-type: 'extras'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
aio: "-aio-gpu-hipblas"
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
latest-image: 'latest-gpu-hipblas'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan'
|
tag-suffix: '-hipblas'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'extras'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'sycl_f16'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f16-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
runs-on: 'ubuntu-latest'
|
image-type: 'extras'
|
||||||
base-image: "ubuntu:22.04"
|
runs-on: 'arc-runner-set'
|
||||||
skip-drivers: 'false'
|
aio: "-aio-gpu-intel-f16"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
latest-image: 'latest-gpu-intel-f16'
|
||||||
latest-image: 'latest-gpu-vulkan'
|
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
||||||
aio: "-aio-gpu-vulkan"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
latest-image-aio: 'latest-aio-gpu-vulkan'
|
- build-type: 'sycl_f32'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f32-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
aio: "-aio-gpu-intel-f32"
|
||||||
|
latest-image: 'latest-gpu-intel-f32'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
# Core images
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16'
|
tag-suffix: '-sycl-f16-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'false'
|
||||||
runs-on: 'ubuntu-latest'
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
latest-image: 'latest-gpu-intel-f16'
|
|
||||||
aio: "-aio-gpu-intel-f16"
|
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32'
|
tag-suffix: '-sycl-f32-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'false'
|
||||||
runs-on: 'ubuntu-latest'
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
latest-image: 'latest-gpu-intel-f32'
|
- build-type: 'sycl_f16'
|
||||||
aio: "-aio-gpu-intel-f32"
|
platforms: 'linux/amd64'
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
gh-runner:
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'sycl_f32'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-core'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
|
||||||
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -172,7 +251,6 @@ jobs:
|
|||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
latest-image: ${{ matrix.latest-image }}
|
latest-image: ${{ matrix.latest-image }}
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
skip-drivers: ${{ matrix.skip-drivers }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -181,15 +259,59 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
- build-type: ''
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
aio: "-aio-cpu"
|
||||||
|
latest-image: 'latest-cpu'
|
||||||
|
latest-image-aio: 'latest-aio-cpu'
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "11"
|
||||||
|
cuda-minor-version: "7"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda11-core'
|
||||||
|
ffmpeg: ''
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/arm64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-nvidia-l4t-arm64'
|
tag-suffix: '-cublas-cuda12-core'
|
||||||
latest-image: 'latest-nvidia-l4t-arm64'
|
ffmpeg: ''
|
||||||
ffmpeg: 'true'
|
image-type: 'core'
|
||||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-24.04-arm'
|
runs-on: 'ubuntu-latest'
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "11"
|
||||||
|
cuda-minor-version: "7"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "1"
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'true'
|
|
||||||
|
|||||||
95
.github/workflows/image_build.yml
vendored
95
.github/workflows/image_build.yml
vendored
@@ -19,11 +19,11 @@ on:
|
|||||||
type: string
|
type: string
|
||||||
cuda-major-version:
|
cuda-major-version:
|
||||||
description: 'CUDA major version'
|
description: 'CUDA major version'
|
||||||
default: "12"
|
default: "11"
|
||||||
type: string
|
type: string
|
||||||
cuda-minor-version:
|
cuda-minor-version:
|
||||||
description: 'CUDA minor version'
|
description: 'CUDA minor version'
|
||||||
default: "4"
|
default: "7"
|
||||||
type: string
|
type: string
|
||||||
platforms:
|
platforms:
|
||||||
description: 'Platforms'
|
description: 'Platforms'
|
||||||
@@ -49,9 +49,9 @@ on:
|
|||||||
description: 'FFMPEG'
|
description: 'FFMPEG'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
skip-drivers:
|
image-type:
|
||||||
description: 'Skip drivers by default'
|
description: 'Image type'
|
||||||
default: 'false'
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
runs-on:
|
runs-on:
|
||||||
description: 'Runs on'
|
description: 'Runs on'
|
||||||
@@ -81,22 +81,6 @@ jobs:
|
|||||||
reusable_image-build:
|
reusable_image-build:
|
||||||
runs-on: ${{ inputs.runs-on }}
|
runs-on: ${{ inputs.runs-on }}
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
# this might remove tools that are actually needed,
|
|
||||||
# if set to "true" but frees about 6 GB
|
|
||||||
tool-cache: true
|
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: true
|
|
||||||
swap-storage: true
|
|
||||||
- name: Force Install GIT latest
|
- name: Force Install GIT latest
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update \
|
sudo apt-get update \
|
||||||
@@ -118,8 +102,8 @@ jobs:
|
|||||||
df -h
|
df -h
|
||||||
echo
|
echo
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
|
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
|
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
sudo rm -rf /usr/local/lib/android
|
sudo rm -rf /usr/local/lib/android
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
sudo rm -rf /usr/share/dotnet
|
sudo rm -rf /usr/share/dotnet
|
||||||
@@ -152,7 +136,6 @@ jobs:
|
|||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
@@ -165,20 +148,7 @@ jobs:
|
|||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.tag-suffix }}
|
suffix=${{ inputs.tag-suffix }}
|
||||||
- name: Docker meta for PR
|
|
||||||
id: meta_pull_request
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
quay.io/go-skynet/ci-tests
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
type=semver,pattern={{raw}},suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
type=sha,suffix=localai${{ github.event.number }}-${{ inputs.build-type }}-${{ inputs.cuda-major-version }}-${{ inputs.cuda-minor-version }}
|
|
||||||
flavor: |
|
|
||||||
latest=${{ inputs.tag-latest }}
|
|
||||||
suffix=${{ inputs.tag-suffix }}
|
|
||||||
- name: Docker meta AIO (quay.io)
|
- name: Docker meta AIO (quay.io)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
id: meta_aio
|
id: meta_aio
|
||||||
@@ -204,6 +174,7 @@ jobs:
|
|||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
type=semver,pattern={{raw}}
|
type=semver,pattern={{raw}}
|
||||||
flavor: |
|
flavor: |
|
||||||
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.aio }}
|
suffix=${{ inputs.aio }}
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
@@ -231,8 +202,7 @@ jobs:
|
|||||||
password: ${{ secrets.quayPassword }}
|
password: ${{ secrets.quayPassword }}
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v5
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||||
@@ -244,12 +214,12 @@ jobs:
|
|||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
FFMPEG=${{ inputs.ffmpeg }}
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.63.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
@@ -257,38 +227,10 @@ jobs:
|
|||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
### Start testing image
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
|
||||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
|
||||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
|
||||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
|
||||||
build-args: |
|
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
|
||||||
FFMPEG=${{ inputs.ffmpeg }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
|
||||||
GRPC_VERSION=v1.65.0
|
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
#push: true
|
|
||||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
|
||||||
## End testing image
|
|
||||||
- name: Build and push AIO image
|
- name: Build and push AIO image
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
@@ -303,7 +245,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Build and push AIO image (dockerhub)
|
- name: Build and push AIO image (dockerhub)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
@@ -316,11 +258,6 @@ jobs:
|
|||||||
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
||||||
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
||||||
|
|
||||||
- name: Cleanup
|
|
||||||
run: |
|
|
||||||
docker builder prune -f
|
|
||||||
docker system prune --force --volumes --all
|
|
||||||
|
|
||||||
- name: Latest tag
|
- name: Latest tag
|
||||||
# run this on branches, when it is a tag and there is a latest-image defined
|
# run this on branches, when it is a tag and there is a latest-image defined
|
||||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
||||||
@@ -341,7 +278,7 @@ jobs:
|
|||||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
||||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||||
|
|
||||||
- name: job summary
|
- name: job summary
|
||||||
run: |
|
run: |
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|||||||
168
.github/workflows/notify-models.yaml
vendored
168
.github/workflows/notify-models.yaml
vendored
@@ -1,168 +0,0 @@
|
|||||||
name: Notifications for new models
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
types:
|
|
||||||
- closed
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
notify-discord:
|
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
|
||||||
env:
|
|
||||||
MODEL_NAME: gemma-3-12b-it
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
|
||||||
- uses: mudler/localai-github-action@v1
|
|
||||||
with:
|
|
||||||
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.1
|
|
||||||
id: git-diff-action
|
|
||||||
with:
|
|
||||||
json_diff_file_output: diff.json
|
|
||||||
raw_diff_file_output: diff.txt
|
|
||||||
file_output_only: "true"
|
|
||||||
- name: Summarize
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$(cat $DIFF)"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
echo "Summary:"
|
|
||||||
echo "$summary"
|
|
||||||
echo "payload sent"
|
|
||||||
echo "$json_payload"
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
docker logs --tail 10 local-ai
|
|
||||||
- name: Discord notification
|
|
||||||
env:
|
|
||||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
|
||||||
DISCORD_USERNAME: "LocalAI-Bot"
|
|
||||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
|
||||||
uses: Ilshidur/action-discord@master
|
|
||||||
with:
|
|
||||||
args: ${{ steps.summarize.outputs.message }}
|
|
||||||
- name: Setup tmate session if fails
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
notify-twitter:
|
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
|
||||||
env:
|
|
||||||
MODEL_NAME: gemma-3-12b-it
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
|
||||||
- name: Start LocalAI
|
|
||||||
run: |
|
|
||||||
echo "Starting LocalAI..."
|
|
||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.1
|
|
||||||
id: git-diff-action
|
|
||||||
with:
|
|
||||||
json_diff_file_output: diff.json
|
|
||||||
raw_diff_file_output: diff.txt
|
|
||||||
file_output_only: "true"
|
|
||||||
- name: Summarize
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$(cat $DIFF)"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
echo "Summary:"
|
|
||||||
echo "$summary"
|
|
||||||
echo "payload sent"
|
|
||||||
echo "$json_payload"
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
docker logs --tail 10 local-ai
|
|
||||||
- uses: Eomm/why-don-t-you-tweet@v2
|
|
||||||
with:
|
|
||||||
tweet-message: ${{ steps.summarize.outputs.message }}
|
|
||||||
env:
|
|
||||||
# Get your tokens from https://developer.twitter.com/apps
|
|
||||||
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
|
|
||||||
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
|
|
||||||
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
|
|
||||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
|
||||||
- name: Setup tmate session if fails
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
63
.github/workflows/notify-releases.yaml
vendored
63
.github/workflows/notify-releases.yaml
vendored
@@ -1,63 +0,0 @@
|
|||||||
name: Release notifications
|
|
||||||
on:
|
|
||||||
release:
|
|
||||||
types:
|
|
||||||
- published
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
notify-discord:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
RELEASE_BODY: ${{ github.event.release.body }}
|
|
||||||
RELEASE_TITLE: ${{ github.event.release.name }}
|
|
||||||
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
|
|
||||||
steps:
|
|
||||||
- uses: mudler/localai-github-action@v1
|
|
||||||
with:
|
|
||||||
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
|
||||||
- name: Summarize
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$RELEASE_TITLE\b$RELEASE_BODY"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "Write a discord message with a bullet point summary of the release notes."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI API
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary=$(echo $response | jq -r '.choices[0].message.content')
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
- name: Discord notification
|
|
||||||
env:
|
|
||||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
|
|
||||||
DISCORD_USERNAME: "LocalAI-Bot"
|
|
||||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
|
||||||
uses: Ilshidur/action-discord@master
|
|
||||||
with:
|
|
||||||
args: ${{ steps.summarize.outputs.message }}
|
|
||||||
28
.github/workflows/prlint.yaml
vendored
28
.github/workflows/prlint.yaml
vendored
@@ -1,28 +0,0 @@
|
|||||||
name: Check PR style
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
types:
|
|
||||||
- opened
|
|
||||||
- reopened
|
|
||||||
- edited
|
|
||||||
- synchronize
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
title-lint:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
statuses: write
|
|
||||||
steps:
|
|
||||||
- uses: aslafy-z/conventional-pr-title-action@v3
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
# check-pr-description:
|
|
||||||
# runs-on: ubuntu-latest
|
|
||||||
# steps:
|
|
||||||
# - uses: actions/checkout@v2
|
|
||||||
# - uses: jadrol/pr-description-checker-action@v1.0.0
|
|
||||||
# id: description-checker
|
|
||||||
# with:
|
|
||||||
# repo-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
# exempt-labels: no qa
|
|
||||||
283
.github/workflows/release.yaml
vendored
283
.github/workflows/release.yaml
vendored
@@ -1,15 +1,11 @@
|
|||||||
name: Build and Release
|
name: Build and Release
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
- push
|
||||||
branches:
|
- pull_request
|
||||||
- master
|
|
||||||
tags:
|
|
||||||
- 'v*'
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GRPC_VERSION: v1.65.0
|
GRPC_VERSION: v1.63.0
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
@@ -19,183 +15,9 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
||||||
build-linux-arm:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
|
||||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
|
||||||
make install-go-tools
|
|
||||||
- name: Install CUDA Dependencies
|
|
||||||
run: |
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
|
||||||
env:
|
|
||||||
CUDA_VERSION: 12-4
|
|
||||||
- name: Cache grpc
|
|
||||||
id: cache-grpc
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: grpc
|
|
||||||
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
|
|
||||||
- name: Build grpc
|
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
|
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
|
||||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
../.. && sudo make --jobs 5 --output-sync=target
|
|
||||||
- name: Install gRPC
|
|
||||||
run: |
|
|
||||||
GNU_HOST=aarch64-linux-gnu
|
|
||||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
|
||||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
|
||||||
|
|
||||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
|
||||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
|
||||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
|
||||||
|
|
||||||
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
|
|
||||||
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
|
||||||
GRPC_DIR=$PWD/grpc
|
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
|
||||||
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
|
||||||
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
|
||||||
cd $GRPC_CROSS_BUILD_DIR && \
|
|
||||||
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
|
|
||||||
../.. && \
|
|
||||||
sudo make -j`nproc` install
|
|
||||||
- name: Build
|
|
||||||
id: build
|
|
||||||
run: |
|
|
||||||
GNU_HOST=aarch64-linux-gnu
|
|
||||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
|
||||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
|
||||||
|
|
||||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
|
||||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
|
||||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
|
||||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
|
||||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
|
||||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
|
||||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
|
||||||
GOOS=linux \
|
|
||||||
GOARCH=arm64 \
|
|
||||||
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: LocalAI-linux-arm64
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
build-linux:
|
build-linux:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Free Disk Space (Ubuntu)
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
# this might remove tools that are actually needed,
|
|
||||||
# if set to "true" but frees about 6 GB
|
|
||||||
tool-cache: true
|
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: true
|
|
||||||
swap-storage: true
|
|
||||||
|
|
||||||
- name: Release space from worker
|
|
||||||
run: |
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
df -h
|
|
||||||
echo
|
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get remove -y microsoft-edge-stable || true
|
|
||||||
sudo apt-get remove -y firefox || true
|
|
||||||
sudo apt-get remove -y powershell || true
|
|
||||||
sudo apt-get remove -y r-base-core || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
sudo rm -rf /usr/share/dotnet || true
|
|
||||||
sudo rm -rf /opt/ghc || true
|
|
||||||
sudo rm -rf "/usr/local/share/boost" || true
|
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -207,14 +29,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
sudo apt-get install build-essential ffmpeg protobuf-compiler
|
||||||
make install-go-tools
|
|
||||||
- name: Intel Dependencies
|
|
||||||
run: |
|
|
||||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
|
||||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y intel-basekit
|
|
||||||
- name: Install CUDA Dependencies
|
- name: Install CUDA Dependencies
|
||||||
run: |
|
run: |
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
@@ -222,27 +37,7 @@ jobs:
|
|||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-5
|
CUDA_VERSION: 12-3
|
||||||
- name: "Install Hipblas"
|
|
||||||
env:
|
|
||||||
ROCM_VERSION: "6.1"
|
|
||||||
AMDGPU_VERSION: "6.1"
|
|
||||||
run: |
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
sudo apt-get update
|
|
||||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
|
|
||||||
|
|
||||||
sudo apt update
|
|
||||||
wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
|
|
||||||
sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
|
|
||||||
sudo apt update
|
|
||||||
|
|
||||||
sudo amdgpu-install --usecase=rocm
|
|
||||||
|
|
||||||
sudo apt-get clean
|
|
||||||
sudo rm -rf /var/lib/apt/lists/*
|
|
||||||
sudo ldconfig
|
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@@ -253,26 +48,20 @@ jobs:
|
|||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make --jobs 5 --output-sync=target
|
../.. && sudo make --jobs 5 --output-sync=target
|
||||||
- name: Install gRPC
|
- name: Install gRPC
|
||||||
run: |
|
run: |
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
||||||
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
export PATH=/opt/rocm/bin:$PATH
|
make dist
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
|
||||||
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
|
|
||||||
make -j4 dist
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: LocalAI-linux
|
name: LocalAI-linux
|
||||||
@@ -283,17 +72,9 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
|
build-stablediffusion:
|
||||||
build-macOS-x86_64:
|
runs-on: ubuntu-latest
|
||||||
runs-on: macos-13
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -305,33 +86,18 @@ jobs:
|
|||||||
cache: false
|
cache: false
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler
|
||||||
make install-go-tools
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
- name: Build
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
id: build
|
- name: Build stablediffusion
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
|
make backend-assets/grpc/stablediffusion
|
||||||
make dist
|
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: LocalAI-MacOS-x86_64
|
name: stablediffusion
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
build-macOS-arm64:
|
build-macOS-arm64:
|
||||||
runs-on: macos-14
|
runs-on: macos-14
|
||||||
@@ -346,15 +112,15 @@ jobs:
|
|||||||
cache: false
|
cache: false
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc libomp llvm
|
brew install protobuf grpc
|
||||||
make install-go-tools
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -366,10 +132,3 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.22.5
|
uses: securego/gosec@master
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
176
.github/workflows/test-extra.yml
vendored
176
.github/workflows/test-extra.yml
vendored
@@ -14,34 +14,12 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# Requires CUDA
|
|
||||||
# tests-chatterbox-tts:
|
|
||||||
# runs-on: ubuntu-latest
|
|
||||||
# steps:
|
|
||||||
# - name: Clone
|
|
||||||
# uses: actions/checkout@v4
|
|
||||||
# with:
|
|
||||||
# submodules: true
|
|
||||||
# - name: Dependencies
|
|
||||||
# run: |
|
|
||||||
# sudo apt-get update
|
|
||||||
# sudo apt-get install build-essential ffmpeg
|
|
||||||
# # Install UV
|
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
# sudo apt-get install -y libopencv-dev
|
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
# - name: Test chatterbox-tts
|
|
||||||
# run: |
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/chatterbox
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/chatterbox test
|
|
||||||
tests-transformers:
|
tests-transformers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@@ -51,18 +29,42 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user grpcio-tools==1.63.0
|
||||||
|
|
||||||
- name: Test transformers
|
- name: Test transformers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||||
|
|
||||||
|
tests-sentencetransformers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
# Install UV
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools==1.63.0
|
||||||
|
|
||||||
|
- name: Test sentencetransformers
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||||
|
|
||||||
|
|
||||||
tests-rerankers:
|
tests-rerankers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@@ -72,7 +74,7 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user grpcio-tools==1.63.0
|
||||||
|
|
||||||
- name: Test rerankers
|
- name: Test rerankers
|
||||||
run: |
|
run: |
|
||||||
@@ -84,7 +86,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@@ -94,38 +96,64 @@ jobs:
|
|||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
# Install UV
|
# Install UV
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user grpcio-tools==1.63.0
|
||||||
- name: Test diffusers
|
- name: Test diffusers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||||
|
|
||||||
#tests-vllm:
|
tests-parler-tts:
|
||||||
# runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# steps:
|
steps:
|
||||||
# - name: Clone
|
- name: Clone
|
||||||
# uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
# with:
|
with:
|
||||||
# submodules: true
|
submodules: true
|
||||||
# - name: Dependencies
|
- name: Dependencies
|
||||||
# run: |
|
run: |
|
||||||
# sudo apt-get update
|
sudo apt-get update
|
||||||
# sudo apt-get install -y build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# Install UV
|
||||||
# sudo apt-get install -y libopencv-dev
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# # Install UV
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user grpcio-tools==1.63.0
|
||||||
# - name: Test vllm backend
|
|
||||||
# run: |
|
- name: Test parler-tts
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||||
# tests-transformers-musicgen:
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||||
|
|
||||||
|
tests-transformers-musicgen:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
# Install UV
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools==1.63.0
|
||||||
|
|
||||||
|
- name: Test transformers-musicgen
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# tests-petals:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
# run: |
|
# run: |
|
||||||
@@ -135,12 +163,14 @@ jobs:
|
|||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
# pip install --user grpcio-tools==1.63.0
|
||||||
|
|
||||||
# - name: Test transformers-musicgen
|
# - name: Test petals
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
# make --jobs=5 --output-sync=target -C backend/python/petals
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
@@ -187,7 +217,7 @@ jobs:
|
|||||||
# df -h
|
# df -h
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
# run: |
|
# run: |
|
||||||
@@ -197,14 +227,14 @@ jobs:
|
|||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
# pip install --user grpcio-tools==1.63.0
|
||||||
|
|
||||||
# - name: Test bark
|
# - name: Test bark
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark
|
# make --jobs=5 --output-sync=target -C backend/python/bark
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
||||||
|
|
||||||
|
|
||||||
# Below tests needs GPU. Commented out for now
|
# Below tests needs GPU. Commented out for now
|
||||||
# TODO: Re-enable as soon as we have GPU nodes
|
# TODO: Re-enable as soon as we have GPU nodes
|
||||||
# tests-vllm:
|
# tests-vllm:
|
||||||
@@ -212,7 +242,7 @@ jobs:
|
|||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
# run: |
|
# run: |
|
||||||
@@ -222,18 +252,38 @@ jobs:
|
|||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
# pip install --user grpcio-tools==1.63.0
|
||||||
# - name: Test vllm
|
# - name: Test vllm
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||||
|
tests-vallex:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
# Install UV
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools==1.63.0
|
||||||
|
- name: Test vall-e-x
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
||||||
|
|
||||||
tests-coqui:
|
tests-coqui:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@@ -242,8 +292,8 @@ jobs:
|
|||||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||||
# Install UV
|
# Install UV
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user grpcio-tools==1.63.0
|
||||||
- name: Test coqui
|
- name: Test coqui
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||||
54
.github/workflows/test.yml
vendored
54
.github/workflows/test.yml
vendored
@@ -10,7 +10,7 @@ on:
|
|||||||
- '*'
|
- '*'
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GRPC_VERSION: v1.65.0
|
GRPC_VERSION: v1.63.0
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
@@ -70,8 +70,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
sudo apt-get install build-essential curl ffmpeg
|
||||||
sudo apt-get install -y libgmock-dev clang
|
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
@@ -94,21 +93,23 @@ jobs:
|
|||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||||
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
make -C backend/python/transformers
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||||
|
|
||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
make sources/go-piper && \
|
make sources/go-piper && \
|
||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||||
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
|
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
CUDA_VERSION: 12-3
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@@ -119,8 +120,7 @@ jobs:
|
|||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make --jobs 5
|
../.. && sudo make --jobs 5
|
||||||
- name: Install gRPC
|
- name: Install gRPC
|
||||||
@@ -128,10 +128,10 @@ jobs:
|
|||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -176,27 +176,17 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
# Install protoc
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
docker build --build-arg FFMPEG=true --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -222,20 +212,18 @@ jobs:
|
|||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
|
pip install --user grpcio-tools==1.63.0
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|||||||
10
.github/workflows/update_swagger.yaml
vendored
10
.github/workflows/update_swagger.yaml
vendored
@@ -13,19 +13,13 @@ jobs:
|
|||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: 'stable'
|
go-version: 'stable'
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install protobuf-compiler
|
|
||||||
- run: |
|
- run: |
|
||||||
go install github.com/swaggo/swag/cmd/swag@latest
|
go install github.com/swaggo/swag/cmd/swag@latest
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Bump swagger 🔧
|
- name: Bump swagger 🔧
|
||||||
run: |
|
run: |
|
||||||
make protogen-go swagger
|
make swagger
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
10
.github/workflows/yaml-check.yml
vendored
10
.github/workflows/yaml-check.yml
vendored
@@ -8,7 +8,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: 'Checkout'
|
- name: 'Checkout'
|
||||||
uses: actions/checkout@master
|
uses: actions/checkout@master
|
||||||
- name: 'Yamllint model gallery'
|
- name: 'Yamllint'
|
||||||
uses: karancode/yamllint-github-action@master
|
uses: karancode/yamllint-github-action@master
|
||||||
with:
|
with:
|
||||||
yamllint_file_or_dir: 'gallery'
|
yamllint_file_or_dir: 'gallery'
|
||||||
@@ -16,11 +16,3 @@ jobs:
|
|||||||
yamllint_comment: true
|
yamllint_comment: true
|
||||||
env:
|
env:
|
||||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- name: 'Yamllint Backend gallery'
|
|
||||||
uses: karancode/yamllint-github-action@master
|
|
||||||
with:
|
|
||||||
yamllint_file_or_dir: 'backend'
|
|
||||||
yamllint_strict: false
|
|
||||||
yamllint_comment: true
|
|
||||||
env:
|
|
||||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
11
.gitignore
vendored
11
.gitignore
vendored
@@ -2,17 +2,14 @@
|
|||||||
/sources/
|
/sources/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.a
|
*.a
|
||||||
*.o
|
|
||||||
get-sources
|
get-sources
|
||||||
prepare-sources
|
prepare-sources
|
||||||
/backend/cpp/llama/grpc-server
|
/backend/cpp/llama/grpc-server
|
||||||
/backend/cpp/llama/llama.cpp
|
/backend/cpp/llama/llama.cpp
|
||||||
/backend/cpp/llama-*
|
|
||||||
|
|
||||||
*.log
|
|
||||||
|
|
||||||
go-ggml-transformers
|
go-ggml-transformers
|
||||||
go-gpt2
|
go-gpt2
|
||||||
|
go-rwkv
|
||||||
whisper.cpp
|
whisper.cpp
|
||||||
/bloomz
|
/bloomz
|
||||||
go-bert
|
go-bert
|
||||||
@@ -42,7 +39,6 @@ backend-assets/*
|
|||||||
!backend-assets/.keep
|
!backend-assets/.keep
|
||||||
prepare
|
prepare
|
||||||
/ggml-metal.metal
|
/ggml-metal.metal
|
||||||
docs/static/gallery.html
|
|
||||||
|
|
||||||
# Protobuf generated files
|
# Protobuf generated files
|
||||||
*.pb.go
|
*.pb.go
|
||||||
@@ -53,7 +49,4 @@ docs/static/gallery.html
|
|||||||
.scannerwork
|
.scannerwork
|
||||||
|
|
||||||
# backend virtual environments
|
# backend virtual environments
|
||||||
**/venv
|
**/venv
|
||||||
|
|
||||||
# per-developer customization files for the development container
|
|
||||||
.devcontainer/customization/*
|
|
||||||
21
.vscode/launch.json
vendored
21
.vscode/launch.json
vendored
@@ -3,12 +3,12 @@
|
|||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
{
|
||||||
"name": "Python: Current File",
|
"name": "Python: Current File",
|
||||||
"type": "debugpy",
|
"type": "python",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${file}",
|
"program": "${file}",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
"cwd": "${fileDirname}",
|
"cwd": "${workspaceFolder}/examples/langchain-chroma",
|
||||||
"env": {
|
"env": {
|
||||||
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
||||||
"OPENAI_API_KEY": "abc"
|
"OPENAI_API_KEY": "abc"
|
||||||
@@ -19,16 +19,15 @@
|
|||||||
"type": "go",
|
"type": "go",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"mode": "debug",
|
"mode": "debug",
|
||||||
"program": "${workspaceRoot}",
|
"program": "${workspaceFolder}/main.go",
|
||||||
"args": [],
|
"args": [
|
||||||
|
"api"
|
||||||
|
],
|
||||||
"env": {
|
"env": {
|
||||||
"LOCALAI_LOG_LEVEL": "debug",
|
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
||||||
"LOCALAI_P2P": "true",
|
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"DEBUG": "true"
|
||||||
},
|
}
|
||||||
"buildFlags": ["-tags", "p2p tts", "-v"],
|
|
||||||
"envFile": "${workspaceFolder}/.env",
|
|
||||||
"cwd": "${workspaceRoot}"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -15,6 +15,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
- [Documentation](#documentation)
|
- [Documentation](#documentation)
|
||||||
- [Community and Communication](#community-and-communication)
|
- [Community and Communication](#community-and-communication)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
@@ -52,7 +54,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
|||||||
|
|
||||||
## Coding Guidelines
|
## Coding Guidelines
|
||||||
|
|
||||||
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
|
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
@@ -82,3 +84,5 @@ We are welcome the contribution of the documents, please open new PR or create a
|
|||||||
- You can reach out via the Github issue tracker.
|
- You can reach out via the Github issue tracker.
|
||||||
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||||
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
|
---
|
||||||
|
|||||||
356
Dockerfile
356
Dockerfile
@@ -1,138 +1,128 @@
|
|||||||
|
ARG IMAGE_TYPE=extras
|
||||||
ARG BASE_IMAGE=ubuntu:22.04
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||||
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
|
||||||
|
|
||||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||||
FROM ${BASE_IMAGE} AS requirements
|
FROM ${BASE_IMAGE} AS requirements-core
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG GO_VERSION=1.22.6
|
ARG GO_VERSION=1.21.7
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||||
|
|
||||||
|
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
build-essential \
|
build-essential \
|
||||||
ccache \
|
ca-certificates \
|
||||||
ca-certificates espeak-ng \
|
cmake \
|
||||||
curl libssl-dev \
|
curl \
|
||||||
git \
|
git \
|
||||||
git-lfs \
|
python3-pip \
|
||||||
unzip upx-ucl python3 python-is-python3 && \
|
python-is-python3 \
|
||||||
|
unzip && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
pip install --upgrade pip
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers and rice
|
# Install grpc compilers
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
|
||||||
|
# Install grpcio-tools (the version in 22.04 is too old)
|
||||||
|
RUN pip install --user grpcio-tools
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
|
|
||||||
RUN test -n "$TARGETARCH" \
|
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|
||||||
|
|
||||||
# Use the variables in subsequent instructions
|
# Use the variables in subsequent instructions
|
||||||
RUN echo "Target Architecture: $TARGETARCH"
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
RUN echo "Target Variant: $TARGETVARIANT"
|
RUN echo "Target Variant: $TARGETVARIANT"
|
||||||
|
|
||||||
# Cuda
|
# Cuda
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
# HipBLAS requirements
|
# HipBLAS requirements
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
ENV PATH /opt/rocm/bin:${PATH}
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libopenblas-dev && \
|
libopenblas-dev \
|
||||||
|
libopencv-dev && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Set up OpenCV
|
||||||
|
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
|
RUN test -n "$TARGETARCH" \
|
||||||
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||||
|
FROM requirements-core AS requirements-extras
|
||||||
|
|
||||||
|
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
|
||||||
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
espeak-ng \
|
||||||
|
espeak \
|
||||||
|
python3-dev \
|
||||||
|
python3-venv && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||||
FROM requirements AS requirements-drivers
|
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||||
|
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ARG CUDA_MINOR_VERSION=0
|
ARG CUDA_MINOR_VERSION=7
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
|
||||||
# Vulkan requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
vulkan-sdk && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
# CuBLAS requirements
|
||||||
RUN <<EOT bash
|
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils
|
software-properties-common && \
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/* \
|
||||||
fi
|
; fi
|
||||||
EOT
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libclblast-dev && \
|
libclblast-dev && \
|
||||||
@@ -140,7 +130,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|||||||
rm -rf /var/lib/apt/lists/* \
|
rm -rf /var/lib/apt/lists/* \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
hipblas-dev \
|
hipblas-dev \
|
||||||
@@ -155,26 +145,13 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# Temporary workaround for Intel's repository to work correctly
|
|
||||||
# https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
|
|
||||||
# This is a temporary workaround until Intel fixes their repository
|
|
||||||
FROM ${INTEL_BASE_IMAGE} AS intel
|
|
||||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
|
||||||
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
|
||||||
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||||
|
|
||||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||||
ARG GRPC_VERSION=v1.65.0
|
ARG GRPC_VERSION=v1.58.0
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||||
|
|
||||||
@@ -183,31 +160,18 @@ WORKDIR /build
|
|||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
build-essential curl libssl-dev \
|
build-essential \
|
||||||
|
cmake \
|
||||||
git && \
|
git && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||||
# and running make install in the target container
|
# and running make install in the target container
|
||||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
mkdir -p /build/grpc/cmake/build && \
|
mkdir -p /build/grpc/cmake/build && \
|
||||||
cd /build/grpc/cmake/build && \
|
cd /build/grpc/cmake/build && \
|
||||||
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
|
||||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||||
make && \
|
make && \
|
||||||
make install && \
|
make install && \
|
||||||
@@ -216,14 +180,13 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
|
# Adjustments to the build process should likely be made here.
|
||||||
|
FROM requirements-drivers AS builder
|
||||||
|
|
||||||
FROM requirements-drivers AS builder-base
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
|
|
||||||
ARG GO_TAGS="tts p2p"
|
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG LD_FLAGS="-s -w"
|
|
||||||
|
|
||||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
@@ -231,75 +194,31 @@ ENV MAKEFLAGS=${MAKEFLAGS}
|
|||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
ENV LD_FLAGS=${LD_FLAGS}
|
|
||||||
|
|
||||||
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
|
||||||
# here so that we can generate the grpc code for the stablediffusion build
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# Compile backends first in a separate stage
|
|
||||||
FROM builder-base AS builder-backends
|
|
||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
COPY ./Makefile .
|
|
||||||
COPY ./backend ./backend
|
|
||||||
COPY ./go.mod .
|
|
||||||
COPY ./go.sum .
|
|
||||||
COPY ./.git ./.git
|
|
||||||
|
|
||||||
# Some of the Go backends use libs from the main src, we could further optimize the caching by building the CPP backends before here
|
|
||||||
COPY ./pkg/grpc ./pkg/grpc
|
|
||||||
COPY ./pkg/utils ./pkg/utils
|
|
||||||
COPY ./pkg/langchain ./pkg/langchain
|
|
||||||
|
|
||||||
RUN ls -l ./
|
|
||||||
RUN make backend-assets
|
|
||||||
RUN make prepare
|
|
||||||
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make grpcs; \
|
|
||||||
else \
|
|
||||||
make grpcs; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
|
||||||
# Adjustments to the build process should likely be made here.
|
|
||||||
FROM builder-backends AS builder
|
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
COPY .git .
|
||||||
|
RUN echo "GO_TAGS: $GO_TAGS"
|
||||||
|
|
||||||
## Build the binary
|
RUN make prepare
|
||||||
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
|
||||||
## Otherwise just run the normal build
|
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||||
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
# here so that we can generate the grpc code for the stablediffusion build
|
||||||
else \
|
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
make build; \
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
fi
|
rm protoc.zip
|
||||||
|
|
||||||
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
|
# Install the pre-built GRPC
|
||||||
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
# Rebuild with defaults backends
|
||||||
|
WORKDIR /build
|
||||||
|
RUN make build
|
||||||
|
|
||||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||||
@@ -309,38 +228,6 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The devcontainer target is not used on CI. It is a target for developers to use locally -
|
|
||||||
# rather than copying files it mounts them locally and leaves building to the developer
|
|
||||||
|
|
||||||
FROM builder-base AS devcontainer
|
|
||||||
|
|
||||||
ARG FFMPEG
|
|
||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
|
||||||
|
|
||||||
# Add FFmpeg
|
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ffmpeg && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ssh less wget
|
|
||||||
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
|
||||||
|
|
||||||
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
|
||||||
|
|
||||||
RUN go install github.com/mikefarah/yq/v4@latest
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||||
FROM requirements-drivers
|
FROM requirements-drivers
|
||||||
@@ -348,6 +235,8 @@ FROM requirements-drivers
|
|||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
|
ARG IMAGE_TYPE=extras
|
||||||
|
ARG EXTRA_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
@@ -355,7 +244,7 @@ ENV REBUILD=false
|
|||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||||
|
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
@@ -380,19 +269,78 @@ COPY . .
|
|||||||
COPY --from=builder /build/sources ./sources/
|
COPY --from=builder /build/sources ./sources/
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
RUN make prepare-sources
|
||||||
|
|
||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
|
|
||||||
# Copy shared libraries for piper
|
# Copy shared libraries for piper
|
||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
|
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
|
# Change the shell to bash so we can use [[ tests below
|
||||||
|
SHELL ["/bin/bash", "-c"]
|
||||||
|
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||||
|
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||||
|
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||||
|
|
||||||
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/coqui \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/parler-tts \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/diffusers \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/transformers-musicgen \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/exllama \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/vall-e-x \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/petals \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/sentencetransformers \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/exllama2 \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/transformers \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/vllm \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/autogptq \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/bark \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/rerankers \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/mamba \
|
||||||
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
RUN mkdir -p /build/models /build/backends
|
RUN mkdir -p /build/models
|
||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||||
|
|
||||||
VOLUME /build/models /build/backends
|
VOLUME /build/models
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
|
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
254
README.md
254
README.md
@@ -1,6 +1,7 @@
|
|||||||
<h1 align="center">
|
<h1 align="center">
|
||||||
<br>
|
<br>
|
||||||
<img height="300" src="./core/http/static/logo.png"> <br>
|
<img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
|
||||||
|
LocalAI
|
||||||
<br>
|
<br>
|
||||||
</h1>
|
</h1>
|
||||||
|
|
||||||
@@ -30,223 +31,73 @@
|
|||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||||
<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/>
|
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
||||||
</a>
|
</a>
|
||||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
[](https://t.me/localaiofficial_bot)
|
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||||
|
|
||||||
|
## 🔥🔥 Hot topics / Roadmap
|
||||||
|
|
||||||
## 📚🆕 Local Stack Family
|
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
|
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||||
|
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
|
||||||
|
- llama3: https://github.com/mudler/LocalAI/discussions/2076
|
||||||
|
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
|
||||||
|
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
||||||
|
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
||||||
|
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||||
|
|
||||||
<table>
|
Hot topics (looking for contributors):
|
||||||
<tr>
|
|
||||||
<td width="50%" valign="top">
|
|
||||||
<a href="https://github.com/mudler/LocalAGI">
|
|
||||||
<img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
|
|
||||||
</a>
|
|
||||||
</td>
|
|
||||||
<td width="50%" valign="top">
|
|
||||||
<h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
|
|
||||||
<p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td width="50%" valign="top">
|
|
||||||
<a href="https://github.com/mudler/LocalRecall">
|
|
||||||
<img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
|
|
||||||
</a>
|
|
||||||
</td>
|
|
||||||
<td width="50%" valign="top">
|
|
||||||
<h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
|
|
||||||
<p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
## Screenshots
|
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||||
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
|
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||||
|
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||||
|
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
||||||
|
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||||
|
|
||||||
|
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||||
|
|
||||||
| Talk Interface | Generate Audio |
|
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
| --- | --- |
|
|
||||||
|  |  |
|
|
||||||
|
|
||||||
| Models Overview | Generate Images |
|
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide.
|
||||||
| --- | --- |
|
|
||||||
|  |  |
|
|
||||||
|
|
||||||
| Chat Interface | Home |
|
For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO(All-in-one) Image using `docker`:
|
||||||
| --- | --- |
|
|
||||||
|  |  |
|
|
||||||
|
|
||||||
| Login | Swarm |
|
|
||||||
| --- | --- |
|
|
||||||
| |  |
|
|
||||||
|
|
||||||
## 💻 Quickstart
|
|
||||||
|
|
||||||
Run the installer script:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Basic installation
|
|
||||||
curl https://localai.io/install.sh | sh
|
|
||||||
```
|
|
||||||
|
|
||||||
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
|
||||||
|
|
||||||
Or run with docker:
|
|
||||||
|
|
||||||
### CPU only image:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
### NVIDIA GPU Images:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# CUDA 12.0 with core features
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
|
||||||
|
|
||||||
# CUDA 12.0 with extra Python dependencies
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12-extras
|
|
||||||
|
|
||||||
# CUDA 11.7 with core features
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
|
|
||||||
|
|
||||||
# CUDA 11.7 with extra Python dependencies
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11-extras
|
|
||||||
|
|
||||||
# NVIDIA Jetson (L4T) ARM64
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
|
|
||||||
```
|
|
||||||
|
|
||||||
### AMD GPU Images (ROCm):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# ROCm with core features
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
|
|
||||||
|
|
||||||
# ROCm with extra Python dependencies
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas-extras
|
|
||||||
```
|
|
||||||
|
|
||||||
### Intel GPU Images (oneAPI):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Intel GPU with FP16 support
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
|
|
||||||
|
|
||||||
# Intel GPU with FP16 support and extra dependencies
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16-extras
|
|
||||||
|
|
||||||
# Intel GPU with FP32 support
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
|
|
||||||
|
|
||||||
# Intel GPU with FP32 support and extra dependencies
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32-extras
|
|
||||||
```
|
|
||||||
|
|
||||||
### Vulkan GPU Images:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Vulkan with core features
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
|
|
||||||
```
|
|
||||||
|
|
||||||
### AIO Images (pre-downloaded models):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# CPU version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||||
|
# or, if you have an Nvidia GPU:
|
||||||
# NVIDIA CUDA 12 version
|
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
|
||||||
|
|
||||||
# NVIDIA CUDA 11 version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
|
|
||||||
|
|
||||||
# Intel GPU version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
|
|
||||||
|
|
||||||
# AMD GPU version
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
|
|
||||||
```
|
```
|
||||||
|
|
||||||
For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
|
|
||||||
|
|
||||||
To load models:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
|
|
||||||
local-ai run llama-3.2-1b-instruct:q4_k_m
|
|
||||||
# Start LocalAI with the phi-2 model directly from huggingface
|
|
||||||
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
|
||||||
# Install and run a model from the Ollama OCI registry
|
|
||||||
local-ai run ollama://gemma:2b
|
|
||||||
# Run a model from a configuration file
|
|
||||||
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
|
||||||
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
|
||||||
local-ai run oci://localai/phi-2:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
|
||||||
|
|
||||||
## 📰 Latest project news
|
|
||||||
|
|
||||||
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
|
|
||||||
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
|
|
||||||
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
|
|
||||||
- Apr 2025: Rebrand, WebUI enhancements
|
|
||||||
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
|
|
||||||
- Apr 2025: WebUI overhaul, AIO images updates
|
|
||||||
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
|
|
||||||
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
|
||||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
|
||||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
|
||||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
|
||||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
|
||||||
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
|
||||||
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
|
||||||
|
|
||||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
|
||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||||
- 🎨 [Image generation](https://localai.io/features/image-generation)
|
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
|
||||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
- 🆕 [Reranker API](https://localai.io/features/reranker/)
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
|
||||||
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
|
|
||||||
- 🔊 Voice activity detection (Silero-VAD support)
|
|
||||||
- 🌍 Integrated WebUI!
|
|
||||||
|
|
||||||
|
## 💻 Usage
|
||||||
|
|
||||||
|
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
||||||
|
|
||||||
### 🔗 Community and integrations
|
### 🔗 Community and integrations
|
||||||
|
|
||||||
@@ -256,7 +107,6 @@ Build and deploy custom containers:
|
|||||||
WebUIs:
|
WebUIs:
|
||||||
- https://github.com/Jirubizu/localai-admin
|
- https://github.com/Jirubizu/localai-admin
|
||||||
- https://github.com/go-skynet/LocalAI-frontend
|
- https://github.com/go-skynet/LocalAI-frontend
|
||||||
- QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
|
|
||||||
|
|
||||||
Model galleries
|
Model galleries
|
||||||
- https://github.com/go-skynet/model-gallery
|
- https://github.com/go-skynet/model-gallery
|
||||||
@@ -264,24 +114,18 @@ Model galleries
|
|||||||
Other:
|
Other:
|
||||||
- Helm chart https://github.com/go-skynet/helm-charts
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
|
|
||||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
- Terminal utility https://github.com/djcopley/ShellOracle
|
||||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
|
||||||
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
||||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
|
||||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||||
- Another Telegram Bot https://github.com/JackBekket/Hellper
|
|
||||||
- Auto-documentation https://github.com/JackBekket/Reflexia
|
|
||||||
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
|
|
||||||
- Github Actions: https://github.com/marketplace/actions/start-localai
|
|
||||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||||
|
|
||||||
|
|
||||||
### 🔗 Resources
|
### 🔗 Resources
|
||||||
|
|
||||||
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
||||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||||
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
|
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
|
||||||
@@ -289,8 +133,6 @@ Other:
|
|||||||
|
|
||||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||||
|
|
||||||
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
|
|
||||||
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
|
|
||||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
||||||
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
||||||
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
||||||
@@ -318,16 +160,17 @@ If you utilize this repository, data in a downstream project, please consider ci
|
|||||||
|
|
||||||
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
||||||
|
|
||||||
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
|
A huge thank you to our generous sponsors who support this project:
|
||||||
|
|
||||||
<p align="center">
|
|  |
|
||||||
<a href="https://www.spectrocloud.com/" target="blank">
|
|:-----------------------------------------------:|
|
||||||
<img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
|
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||||
</a>
|
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
||||||
<a href="https://www.premai.io/" target="blank">
|
|
||||||
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||||
</a>
|
|
||||||
</p>
|
- [Sponsor list](https://github.com/sponsors/mudler)
|
||||||
|
- JDAM00 (donating HW for the CI)
|
||||||
|
|
||||||
## 🌟 Star history
|
## 🌟 Star history
|
||||||
|
|
||||||
@@ -337,7 +180,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
|
|||||||
|
|
||||||
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
||||||
|
|
||||||
MIT - Author Ettore Di Giacinto <mudler@localai.io>
|
MIT - Author Ettore Di Giacinto
|
||||||
|
|
||||||
## 🙇 Acknowledgements
|
## 🙇 Acknowledgements
|
||||||
|
|
||||||
@@ -349,6 +192,7 @@ LocalAI couldn't have been built without the help of great software already avai
|
|||||||
- https://github.com/antimatter15/alpaca.cpp
|
- https://github.com/antimatter15/alpaca.cpp
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||||
- https://github.com/ggerganov/whisper.cpp
|
- https://github.com/ggerganov/whisper.cpp
|
||||||
|
- https://github.com/saharNooby/rwkv.cpp
|
||||||
- https://github.com/rhasspy/piper
|
- https://github.com/rhasspy/piper
|
||||||
|
|
||||||
## 🤗 Contributors
|
## 🤗 Contributors
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
embeddings: true
|
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
|
backend: bert-embeddings
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,17 +1,56 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
backend: stablediffusion-ggml
|
backend: stablediffusion
|
||||||
cfg_scale: 4.5
|
|
||||||
|
|
||||||
options:
|
|
||||||
- sampler:euler
|
|
||||||
parameters:
|
parameters:
|
||||||
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
model: stablediffusion_assets
|
||||||
step: 25
|
|
||||||
|
license: "BSD-3"
|
||||||
|
urls:
|
||||||
|
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||||
|
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
||||||
|
|
||||||
|
description: |
|
||||||
|
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||||
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
||||||
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||||
|
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
||||||
|
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||||
|
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
||||||
|
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||||
|
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
||||||
|
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
||||||
|
- filename: "stablediffusion_assets/log_sigmas.bin"
|
||||||
|
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||||
|
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||||
|
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
||||||
|
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
||||||
|
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/vocab.txt"
|
||||||
|
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/images/generations \
|
curl http://localhost:8080/v1/images/generations \
|
||||||
|
|||||||
@@ -1,13 +1,7 @@
|
|||||||
name: jina-reranker-v1-base-en
|
name: jina-reranker-v1-base-en
|
||||||
reranking: true
|
backend: rerankers
|
||||||
f16: true
|
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: cross-encoder
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
|
||||||
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,57 +1,59 @@
|
|||||||
context_size: 8192
|
|
||||||
f16: true
|
|
||||||
function:
|
|
||||||
grammar:
|
|
||||||
no_mixed_free_string: true
|
|
||||||
schema_type: llama3.1 # or JSON is supported too (json)
|
|
||||||
response_regex:
|
|
||||||
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
|
|
||||||
mmap: true
|
|
||||||
name: gpt-4
|
name: gpt-4
|
||||||
|
mmap: true
|
||||||
parameters:
|
parameters:
|
||||||
model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||||
|
|
||||||
|
template:
|
||||||
|
chat_message: |
|
||||||
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||||
|
{{- if .FunctionCall }}
|
||||||
|
<tool_call>
|
||||||
|
{{- else if eq .RoleName "tool" }}
|
||||||
|
<tool_response>
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Content}}
|
||||||
|
{{.Content }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .FunctionCall}}
|
||||||
|
{{toJson .FunctionCall}}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .FunctionCall }}
|
||||||
|
</tool_call>
|
||||||
|
{{- else if eq .RoleName "tool" }}
|
||||||
|
</tool_response>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
|
function: |
|
||||||
|
<|im_start|>system
|
||||||
|
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||||
|
<tools>
|
||||||
|
{{range .Functions}}
|
||||||
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
|
{{end}}
|
||||||
|
</tools>
|
||||||
|
Use the following pydantic model json schema for each tool call you will make:
|
||||||
|
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||||
|
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||||
|
<tool_call>
|
||||||
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
|
</tool_call><|im_end|>
|
||||||
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
<tool_call>
|
||||||
|
chat: |
|
||||||
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
completion: |
|
||||||
|
{{.Input}}
|
||||||
|
context_size: 4096
|
||||||
|
f16: true
|
||||||
stopwords:
|
stopwords:
|
||||||
- <|im_end|>
|
- <|im_end|>
|
||||||
- <dummy32000>
|
- <dummy32000>
|
||||||
- <|eot_id|>
|
- "\n</tool_call>"
|
||||||
- <|end_of_text|>
|
- "\n\n\n"
|
||||||
template:
|
usage: |
|
||||||
chat: |
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
"model": "gpt-4",
|
||||||
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
|
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||||
{{.Input }}
|
}'
|
||||||
<|start_header_id|>assistant<|end_header_id|>
|
|
||||||
chat_message: |
|
|
||||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
{{ else if eq .RoleName "tool" -}}
|
|
||||||
The Function was executed and the response was:
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .Content -}}
|
|
||||||
{{.Content -}}
|
|
||||||
{{ else if .FunctionCall -}}
|
|
||||||
{{ range .FunctionCall }}
|
|
||||||
[{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
|
|
||||||
{{ end }}
|
|
||||||
{{ end -}}
|
|
||||||
<|eot_id|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
function: |
|
|
||||||
<|start_header_id|>system<|end_header_id|>
|
|
||||||
You are an expert in composing functions. You are given a question and a set of possible functions.
|
|
||||||
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
|
|
||||||
If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
|
|
||||||
If you decide to invoke any of the function(s), you MUST put it in the format as follows:
|
|
||||||
[func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
|
|
||||||
You SHOULD NOT include any other text in the response.
|
|
||||||
Here is a list of functions in JSON format that you can invoke.
|
|
||||||
{{toJson .Functions}}
|
|
||||||
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
|
||||||
{{.Input}}
|
|
||||||
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
|
||||||
sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
|
|
||||||
uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
backend: silero-vad
|
|
||||||
name: silero-vad
|
|
||||||
parameters:
|
|
||||||
model: silero-vad.onnx
|
|
||||||
download_files:
|
|
||||||
- filename: silero-vad.onnx
|
|
||||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
|
||||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
|
||||||
@@ -1,49 +1,31 @@
|
|||||||
|
backend: llama-cpp
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
name: gpt-4-vision-preview
|
||||||
name: gpt-4o
|
|
||||||
|
roles:
|
||||||
|
user: "USER:"
|
||||||
|
assistant: "ASSISTANT:"
|
||||||
|
system: "SYSTEM:"
|
||||||
|
|
||||||
|
mmproj: bakllava-mmproj.gguf
|
||||||
parameters:
|
parameters:
|
||||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
model: bakllava.gguf
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
- </s>
|
|
||||||
- <|endoftext|>
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{ .RoleName }}
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
Function call:
|
|
||||||
{{ else if eq .RoleName "tool" -}}
|
|
||||||
Function response:
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .Content -}}
|
|
||||||
{{.Content }}
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{ end -}}<|im_end|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
function: |
|
ASSISTANT:
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
For each function call return a json object with function name and arguments
|
|
||||||
<|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
- filename: bakllava.gguf
|
||||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
- filename: bakllava-mmproj.gguf
|
||||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
|
||||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
usage: |
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "gpt-4-vision-preview",
|
||||||
|
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||||
|
|||||||
@@ -129,7 +129,7 @@ detect_gpu
|
|||||||
detect_gpu_size
|
detect_gpu_size
|
||||||
|
|
||||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
|
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||||
|
|
||||||
check_vars
|
check_vars
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
embeddings: true
|
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
|
backend: sentencetransformers
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: all-MiniLM-L6-v2
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,13 +1,7 @@
|
|||||||
name: jina-reranker-v1-base-en
|
name: jina-reranker-v1-base-en
|
||||||
reranking: true
|
backend: rerankers
|
||||||
f16: true
|
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: cross-encoder
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
|
||||||
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,53 +1,59 @@
|
|||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
function:
|
|
||||||
capture_llm_results:
|
|
||||||
- (?s)<Thought>(.*?)</Thought>
|
|
||||||
grammar:
|
|
||||||
properties_order: name,arguments
|
|
||||||
json_regex_match:
|
|
||||||
- (?s)<Output>(.*?)</Output>
|
|
||||||
replace_llm_results:
|
|
||||||
- key: (?s)<Thought>(.*?)</Thought>
|
|
||||||
value: ""
|
|
||||||
mmap: true
|
|
||||||
name: gpt-4
|
name: gpt-4
|
||||||
|
mmap: true
|
||||||
parameters:
|
parameters:
|
||||||
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
- </s>
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{ .RoleName }}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||||
{{ if .FunctionCall -}}
|
{{- if .FunctionCall }}
|
||||||
Function call:
|
<tool_call>
|
||||||
{{ else if eq .RoleName "tool" -}}
|
{{- else if eq .RoleName "tool" }}
|
||||||
Function response:
|
<tool_response>
|
||||||
{{ end -}}
|
{{- end }}
|
||||||
{{ if .Content -}}
|
{{- if .Content}}
|
||||||
{{.Content }}
|
{{.Content }}
|
||||||
{{ end -}}
|
{{- end }}
|
||||||
{{ if .FunctionCall -}}
|
{{- if .FunctionCall}}
|
||||||
{{toJson .FunctionCall}}
|
{{toJson .FunctionCall}}
|
||||||
{{ end -}}<|im_end|>
|
{{- end }}
|
||||||
completion: |
|
{{- if .FunctionCall }}
|
||||||
{{.Input}}
|
</tool_call>
|
||||||
|
{{- else if eq .RoleName "tool" }}
|
||||||
|
</tool_response>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
function: |
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
You are an AI assistant that executes function calls, and these are the tools at your disposal:
|
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||||
|
<tools>
|
||||||
{{range .Functions}}
|
{{range .Functions}}
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
{{end}}
|
{{end}}
|
||||||
<|im_end|>
|
</tools>
|
||||||
|
Use the following pydantic model json schema for each tool call you will make:
|
||||||
|
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||||
|
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||||
|
<tool_call>
|
||||||
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
|
</tool_call><|im_end|>
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
<tool_call>
|
||||||
download_files:
|
chat: |
|
||||||
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
{{.Input -}}
|
||||||
sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
|
<|im_start|>assistant
|
||||||
uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
completion: |
|
||||||
|
{{.Input}}
|
||||||
|
context_size: 4096
|
||||||
|
f16: true
|
||||||
|
stopwords:
|
||||||
|
- <|im_end|>
|
||||||
|
- <dummy32000>
|
||||||
|
- "\n</tool_call>"
|
||||||
|
- "\n\n\n"
|
||||||
|
usage: |
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||||
|
}'
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
backend: silero-vad
|
|
||||||
name: silero-vad
|
|
||||||
parameters:
|
|
||||||
model: silero-vad.onnx
|
|
||||||
download_files:
|
|
||||||
- filename: silero-vad.onnx
|
|
||||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
|
||||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
|
||||||
@@ -1,49 +1,35 @@
|
|||||||
|
backend: llama-cpp
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
name: gpt-4-vision-preview
|
||||||
name: gpt-4o
|
|
||||||
|
roles:
|
||||||
|
user: "USER:"
|
||||||
|
assistant: "ASSISTANT:"
|
||||||
|
system: "SYSTEM:"
|
||||||
|
|
||||||
|
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
||||||
parameters:
|
parameters:
|
||||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||||
stopwords:
|
temperature: 0.2
|
||||||
- <|im_end|>
|
top_k: 40
|
||||||
- <dummy32000>
|
top_p: 0.95
|
||||||
- </s>
|
seed: -1
|
||||||
- <|endoftext|>
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{ .RoleName }}
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
Function call:
|
|
||||||
{{ else if eq .RoleName "tool" -}}
|
|
||||||
Function response:
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .Content -}}
|
|
||||||
{{.Content }}
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{ end -}}<|im_end|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
function: |
|
ASSISTANT:
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
For each function call return a json object with function name and arguments
|
|
||||||
<|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
||||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
|
||||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
usage: |
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "gpt-4-vision-preview",
|
||||||
|
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
embeddings: true
|
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
|
backend: sentencetransformers
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: all-MiniLM-L6-v2
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
parameters:
|
parameters:
|
||||||
model: Lykon/dreamshaper-8
|
model: runwayml/stable-diffusion-v1-5
|
||||||
backend: diffusers
|
backend: diffusers
|
||||||
step: 25
|
step: 25
|
||||||
f16: true
|
f16: true
|
||||||
|
|||||||
@@ -1,13 +1,7 @@
|
|||||||
name: jina-reranker-v1-base-en
|
name: jina-reranker-v1-base-en
|
||||||
reranking: true
|
backend: rerankers
|
||||||
f16: true
|
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: cross-encoder
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
|
||||||
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,53 +1,59 @@
|
|||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
function:
|
|
||||||
capture_llm_results:
|
|
||||||
- (?s)<Thought>(.*?)</Thought>
|
|
||||||
grammar:
|
|
||||||
properties_order: name,arguments
|
|
||||||
json_regex_match:
|
|
||||||
- (?s)<Output>(.*?)</Output>
|
|
||||||
replace_llm_results:
|
|
||||||
- key: (?s)<Thought>(.*?)</Thought>
|
|
||||||
value: ""
|
|
||||||
mmap: true
|
|
||||||
name: gpt-4
|
name: gpt-4
|
||||||
|
mmap: false
|
||||||
|
f16: false
|
||||||
parameters:
|
parameters:
|
||||||
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||||
stopwords:
|
|
||||||
- <|im_end|>
|
|
||||||
- <dummy32000>
|
|
||||||
- </s>
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{ .RoleName }}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||||
{{ if .FunctionCall -}}
|
{{- if .FunctionCall }}
|
||||||
Function call:
|
<tool_call>
|
||||||
{{ else if eq .RoleName "tool" -}}
|
{{- else if eq .RoleName "tool" }}
|
||||||
Function response:
|
<tool_response>
|
||||||
{{ end -}}
|
{{- end }}
|
||||||
{{ if .Content -}}
|
{{- if .Content}}
|
||||||
{{.Content }}
|
{{.Content }}
|
||||||
{{ end -}}
|
{{- end }}
|
||||||
{{ if .FunctionCall -}}
|
{{- if .FunctionCall}}
|
||||||
{{toJson .FunctionCall}}
|
{{toJson .FunctionCall}}
|
||||||
{{ end -}}<|im_end|>
|
{{- end }}
|
||||||
completion: |
|
{{- if .FunctionCall }}
|
||||||
{{.Input}}
|
</tool_call>
|
||||||
|
{{- else if eq .RoleName "tool" }}
|
||||||
|
</tool_response>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
function: |
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
You are an AI assistant that executes function calls, and these are the tools at your disposal:
|
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||||
|
<tools>
|
||||||
{{range .Functions}}
|
{{range .Functions}}
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
{{end}}
|
{{end}}
|
||||||
<|im_end|>
|
</tools>
|
||||||
|
Use the following pydantic model json schema for each tool call you will make:
|
||||||
|
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||||
|
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||||
|
<tool_call>
|
||||||
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
|
</tool_call><|im_end|>
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
<tool_call>
|
||||||
download_files:
|
chat: |
|
||||||
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
{{.Input -}}
|
||||||
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
|
<|im_start|>assistant
|
||||||
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
completion: |
|
||||||
|
{{.Input}}
|
||||||
|
context_size: 4096
|
||||||
|
stopwords:
|
||||||
|
- <|im_end|>
|
||||||
|
- "\n</tool_call>"
|
||||||
|
- <dummy32000>
|
||||||
|
- "\n\n\n"
|
||||||
|
usage: |
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||||
|
}'
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
backend: silero-vad
|
|
||||||
name: silero-vad
|
|
||||||
parameters:
|
|
||||||
model: silero-vad.onnx
|
|
||||||
download_files:
|
|
||||||
- filename: silero-vad.onnx
|
|
||||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
|
||||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
|
||||||
@@ -1,50 +1,35 @@
|
|||||||
|
backend: llama-cpp
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
mmap: false
|
||||||
mmap: true
|
f16: false
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
name: gpt-4-vision-preview
|
||||||
name: gpt-4o
|
|
||||||
|
roles:
|
||||||
|
user: "USER:"
|
||||||
|
assistant: "ASSISTANT:"
|
||||||
|
system: "SYSTEM:"
|
||||||
|
|
||||||
|
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
||||||
parameters:
|
parameters:
|
||||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||||
stopwords:
|
temperature: 0.2
|
||||||
- <|im_end|>
|
top_k: 40
|
||||||
- <dummy32000>
|
top_p: 0.95
|
||||||
- </s>
|
seed: -1
|
||||||
- <|endoftext|>
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{ .RoleName }}
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
Function call:
|
|
||||||
{{ else if eq .RoleName "tool" -}}
|
|
||||||
Function response:
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .Content -}}
|
|
||||||
{{.Content }}
|
|
||||||
{{ end -}}
|
|
||||||
{{ if .FunctionCall -}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{ end -}}<|im_end|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
function: |
|
ASSISTANT:
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
For each function call return a json object with function name and arguments
|
|
||||||
<|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
||||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
|
||||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
usage: |
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "gpt-4-vision-preview",
|
||||||
|
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||||
|
|||||||
15
assets.go
15
assets.go
@@ -1,15 +1,6 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import "embed"
|
||||||
rice "github.com/GeertJohan/go.rice"
|
|
||||||
)
|
|
||||||
|
|
||||||
var backendAssets *rice.Box
|
//go:embed backend-assets/*
|
||||||
|
var backendAssets embed.FS
|
||||||
func init() {
|
|
||||||
var err error
|
|
||||||
backendAssets, err = rice.FindBox("backend-assets")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,131 +0,0 @@
|
|||||||
ARG BASE_IMAGE=ubuntu:22.04
|
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS builder
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
ARG BUILD_TYPE
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
ARG CUDA_MAJOR_VERSION
|
|
||||||
ARG CUDA_MINOR_VERSION
|
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
|
|
||||||
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
ARG GO_VERSION=1.22.6
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
ccache \
|
|
||||||
ca-certificates \
|
|
||||||
make \
|
|
||||||
curl unzip \
|
|
||||||
libssl-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
|
|
||||||
# Cuda
|
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
||||||
|
|
||||||
# HipBLAS requirements
|
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
|
||||||
|
|
||||||
# Vulkan requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
vulkan-sdk && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libclblast-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
hipblas-dev \
|
|
||||||
rocblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
|
||||||
ldconfig \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
# Install Go
|
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin:/usr/local/bin
|
|
||||||
|
|
||||||
# Install grpc compilers
|
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
RUN echo "TARGETARCH: $TARGETARCH"
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
|
||||||
# here so that we can generate the grpc code for the stablediffusion build
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
COPY . /LocalAI
|
|
||||||
|
|
||||||
RUN cd /LocalAI && make backend-assets/grpc/bark-cpp
|
|
||||||
|
|
||||||
FROM scratch
|
|
||||||
|
|
||||||
COPY --from=builder /LocalAI/backend-assets/grpc/bark-cpp ./
|
|
||||||
COPY --from=builder /LocalAI/backend/go/bark/run.sh ./
|
|
||||||
@@ -1,123 +0,0 @@
|
|||||||
ARG BASE_IMAGE=ubuntu:22.04
|
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS builder
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
ARG BUILD_TYPE
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
ARG CUDA_MAJOR_VERSION
|
|
||||||
ARG CUDA_MINOR_VERSION
|
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
|
|
||||||
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG TARGETVARIANT
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
ccache \
|
|
||||||
ca-certificates \
|
|
||||||
espeak-ng \
|
|
||||||
curl \
|
|
||||||
libssl-dev \
|
|
||||||
git \
|
|
||||||
git-lfs \
|
|
||||||
unzip \
|
|
||||||
upx-ucl \
|
|
||||||
curl python3-pip \
|
|
||||||
python-is-python3 \
|
|
||||||
python3-dev llvm \
|
|
||||||
python3-venv make && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
pip install --upgrade pip
|
|
||||||
|
|
||||||
|
|
||||||
# Cuda
|
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
|
||||||
|
|
||||||
# HipBLAS requirements
|
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
|
||||||
|
|
||||||
# Vulkan requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
vulkan-sdk && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libclblast-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
hipblas-dev \
|
|
||||||
rocblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
|
||||||
ldconfig \
|
|
||||||
; fi
|
|
||||||
# Install uv as a system package
|
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
|
||||||
|
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
|
||||||
|
|
||||||
# Install grpcio-tools (the version in 22.04 is too old)
|
|
||||||
RUN pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
|
|
||||||
|
|
||||||
COPY python/${BACKEND} /${BACKEND}
|
|
||||||
COPY backend.proto /${BACKEND}/backend.proto
|
|
||||||
COPY python/common/ /${BACKEND}/common
|
|
||||||
|
|
||||||
RUN cd /${BACKEND} && make
|
|
||||||
|
|
||||||
FROM scratch
|
|
||||||
ARG BACKEND=rerankers
|
|
||||||
COPY --from=builder /${BACKEND}/ /
|
|
||||||
@@ -14,10 +14,8 @@ service Backend {
|
|||||||
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
||||||
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
||||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||||
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
|
|
||||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||||
rpc TTS(TTSRequest) returns (Result) {}
|
rpc TTS(TTSRequest) returns (Result) {}
|
||||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
|
||||||
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
||||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||||
|
|
||||||
@@ -27,21 +25,6 @@ service Backend {
|
|||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||||
|
|
||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
|
|
||||||
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
|
||||||
|
|
||||||
rpc VAD(VADRequest) returns (VADResponse) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Define the empty request
|
|
||||||
message MetricsRequest {}
|
|
||||||
|
|
||||||
message MetricsResponse {
|
|
||||||
int32 slot_id = 1;
|
|
||||||
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
|
|
||||||
float tokens_per_second = 3;
|
|
||||||
int32 tokens_generated = 4;
|
|
||||||
int32 prompt_tokens_processed = 5;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message RerankRequest {
|
message RerankRequest {
|
||||||
@@ -150,9 +133,6 @@ message PredictOptions {
|
|||||||
repeated string Images = 42;
|
repeated string Images = 42;
|
||||||
bool UseTokenizerTemplate = 43;
|
bool UseTokenizerTemplate = 43;
|
||||||
repeated Message Messages = 44;
|
repeated Message Messages = 44;
|
||||||
repeated string Videos = 45;
|
|
||||||
repeated string Audios = 46;
|
|
||||||
string CorrelationId = 47;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
@@ -160,13 +140,6 @@ message Reply {
|
|||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
int32 tokens = 2;
|
int32 tokens = 2;
|
||||||
int32 prompt_tokens = 3;
|
int32 prompt_tokens = 3;
|
||||||
double timing_prompt_processing = 4;
|
|
||||||
double timing_token_generation = 5;
|
|
||||||
bytes audio = 6;
|
|
||||||
}
|
|
||||||
|
|
||||||
message GrammarTrigger {
|
|
||||||
string word = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -192,7 +165,11 @@ message ModelOptions {
|
|||||||
int32 NGQA = 20;
|
int32 NGQA = 20;
|
||||||
string ModelFile = 21;
|
string ModelFile = 21;
|
||||||
|
|
||||||
|
// AutoGPTQ
|
||||||
|
string Device = 22;
|
||||||
|
bool UseTriton = 23;
|
||||||
|
string ModelBaseName = 24;
|
||||||
|
bool UseFastTokenizer = 25;
|
||||||
|
|
||||||
// Diffusers
|
// Diffusers
|
||||||
string PipelineType = 26;
|
string PipelineType = 26;
|
||||||
@@ -225,12 +202,6 @@ message ModelOptions {
|
|||||||
int32 SwapSpace = 53;
|
int32 SwapSpace = 53;
|
||||||
int32 MaxModelLen = 54;
|
int32 MaxModelLen = 54;
|
||||||
int32 TensorParallelSize = 55;
|
int32 TensorParallelSize = 55;
|
||||||
string LoadFormat = 58;
|
|
||||||
bool DisableLogStatus = 66;
|
|
||||||
string DType = 67;
|
|
||||||
int32 LimitImagePerPrompt = 68;
|
|
||||||
int32 LimitVideoPerPrompt = 69;
|
|
||||||
int32 LimitAudioPerPrompt = 70;
|
|
||||||
|
|
||||||
string MMProj = 41;
|
string MMProj = 41;
|
||||||
|
|
||||||
@@ -244,20 +215,6 @@ message ModelOptions {
|
|||||||
|
|
||||||
bool FlashAttention = 56;
|
bool FlashAttention = 56;
|
||||||
bool NoKVOffload = 57;
|
bool NoKVOffload = 57;
|
||||||
|
|
||||||
string ModelPath = 59;
|
|
||||||
|
|
||||||
repeated string LoraAdapters = 60;
|
|
||||||
repeated float LoraScales = 61;
|
|
||||||
|
|
||||||
repeated string Options = 62;
|
|
||||||
|
|
||||||
string CacheTypeKey = 63;
|
|
||||||
string CacheTypeValue = 64;
|
|
||||||
|
|
||||||
repeated GrammarTrigger GrammarTriggers = 65;
|
|
||||||
|
|
||||||
bool Reranking = 71;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -273,7 +230,6 @@ message TranscriptRequest {
|
|||||||
string dst = 2;
|
string dst = 2;
|
||||||
string language = 3;
|
string language = 3;
|
||||||
uint32 threads = 4;
|
uint32 threads = 4;
|
||||||
bool translate = 5;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message TranscriptResult {
|
message TranscriptResult {
|
||||||
@@ -305,49 +261,11 @@ message GenerateImageRequest {
|
|||||||
int32 CLIPSkip = 11;
|
int32 CLIPSkip = 11;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GenerateVideoRequest {
|
|
||||||
string prompt = 1;
|
|
||||||
string start_image = 2; // Path or base64 encoded image for the start frame
|
|
||||||
string end_image = 3; // Path or base64 encoded image for the end frame
|
|
||||||
int32 width = 4;
|
|
||||||
int32 height = 5;
|
|
||||||
int32 num_frames = 6; // Number of frames to generate
|
|
||||||
int32 fps = 7; // Frames per second
|
|
||||||
int32 seed = 8;
|
|
||||||
float cfg_scale = 9; // Classifier-free guidance scale
|
|
||||||
string dst = 10; // Output path for the generated video
|
|
||||||
}
|
|
||||||
|
|
||||||
message TTSRequest {
|
message TTSRequest {
|
||||||
string text = 1;
|
string text = 1;
|
||||||
string model = 2;
|
string model = 2;
|
||||||
string dst = 3;
|
string dst = 3;
|
||||||
string voice = 4;
|
string voice = 4;
|
||||||
optional string language = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
message VADRequest {
|
|
||||||
repeated float audio = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message VADSegment {
|
|
||||||
float start = 1;
|
|
||||||
float end = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message VADResponse {
|
|
||||||
repeated VADSegment segments = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message SoundGenerationRequest {
|
|
||||||
string text = 1;
|
|
||||||
string model = 2;
|
|
||||||
string dst = 3;
|
|
||||||
optional float duration = 4;
|
|
||||||
optional float temperature = 5;
|
|
||||||
optional bool sample = 6;
|
|
||||||
optional string src = 7;
|
|
||||||
optional int32 src_divisor = 8;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message TokenizationResponse {
|
message TokenizationResponse {
|
||||||
@@ -374,4 +292,4 @@ message StatusResponse {
|
|||||||
message Message {
|
message Message {
|
||||||
string role = 1;
|
string role = 1;
|
||||||
string content = 2;
|
string content = 2;
|
||||||
}
|
}
|
||||||
@@ -46,14 +46,9 @@ endif
|
|||||||
$(INSTALLED_PACKAGES): grpc_build
|
$(INSTALLED_PACKAGES): grpc_build
|
||||||
|
|
||||||
$(GRPC_REPO):
|
$(GRPC_REPO):
|
||||||
mkdir -p $(GRPC_REPO)/grpc
|
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
|
||||||
cd $(GRPC_REPO)/grpc && \
|
cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
|
||||||
git init && \
|
|
||||||
git remote add origin $(GIT_REPO_LIB_GRPC) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(TAG_LIB_GRPC) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
$(GRPC_BUILD): $(GRPC_REPO)
|
$(GRPC_BUILD): $(GRPC_REPO)
|
||||||
mkdir -p $(GRPC_BUILD)
|
mkdir -p $(GRPC_BUILD)
|
||||||
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
|
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
|
||||||
|
|||||||
@@ -1,3 +1,20 @@
|
|||||||
|
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
set(TARGET myclip)
|
||||||
|
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
||||||
|
install(TARGETS ${TARGET} LIBRARY)
|
||||||
|
target_include_directories(myclip PUBLIC .)
|
||||||
|
target_include_directories(myclip PUBLIC ../..)
|
||||||
|
target_include_directories(myclip PUBLIC ../../common)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
if (NOT MSVC)
|
||||||
|
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||||
|
endif()
|
||||||
|
# END CLIP hack
|
||||||
|
|
||||||
|
|
||||||
set(TARGET grpc-server)
|
set(TARGET grpc-server)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
cmake_minimum_required(VERSION 3.15)
|
cmake_minimum_required(VERSION 3.15)
|
||||||
@@ -57,12 +74,8 @@ add_library(hw_grpc_proto
|
|||||||
${hw_proto_srcs}
|
${hw_proto_srcs}
|
||||||
${hw_proto_hdrs} )
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)
|
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
target_include_directories(${TARGET} PRIVATE ../llava)
|
|
||||||
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
|
||||||
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
|
||||||
absl::flags_parse
|
absl::flags_parse
|
||||||
gRPC::${_REFLECTION}
|
gRPC::${_REFLECTION}
|
||||||
gRPC::${_GRPC_GRPCPP}
|
gRPC::${_GRPC_GRPCPP}
|
||||||
|
|||||||
@@ -1,66 +1,48 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=
|
LLAMA_VERSION?=
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
BUILD_TYPE?=
|
BUILD_TYPE?=
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
TARGET?=--target grpc-server
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
# to CMAKE_ARGS automatically
|
# to CMAKE_ARGS automatically
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
else ifeq ($(BUILD_TYPE),clblas)
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
# But if it's OSX without metal, disable it here
|
# But if it's OSX without metal, disable it here
|
||||||
else ifeq ($(OS),Darwin)
|
else ifeq ($(OS),darwin)
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DCMAKE_CXX_FLAGS="-fsycl" \
|
|
||||||
-DGGML_SYCL_F16=ON
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
mkdir -p llama.cpp
|
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||||
cd llama.cpp && \
|
if [ -z "$(LLAMA_VERSION)" ]; then \
|
||||||
git init && \
|
exit 1; \
|
||||||
git remote add origin $(LLAMA_REPO) && \
|
fi
|
||||||
git fetch origin && \
|
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
git checkout -b build $(LLAMA_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
llama.cpp/tools/grpc-server: llama.cpp
|
llama.cpp/examples/grpc-server: llama.cpp
|
||||||
mkdir -p llama.cpp/tools/grpc-server
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
bash prepare.sh
|
bash prepare.sh
|
||||||
|
|
||||||
rebuild:
|
rebuild:
|
||||||
@@ -70,18 +52,18 @@ rebuild:
|
|||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf llama.cpp/build
|
rm -rf llama.cpp/build
|
||||||
rm -rf llama.cpp/tools/grpc-server
|
rm -rf llama.cpp/examples/grpc-server
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf llama.cpp
|
rm -rf llama.cpp
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
bash -c "source $(ONEAPI_VARS); \
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
|
||||||
else
|
else
|
||||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
|
||||||
endif
|
endif
|
||||||
cp llama.cpp/build/bin/grpc-server .
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
File diff suppressed because it is too large
Load Diff
24596
backend/cpp/llama/json.hpp
Normal file
24596
backend/cpp/llama/json.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,13 +0,0 @@
|
|||||||
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
|
|
||||||
index 3cd0d2fa..6c5e811a 100644
|
|
||||||
--- a/tools/mtmd/clip.cpp
|
|
||||||
+++ b/tools/mtmd/clip.cpp
|
|
||||||
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
|
||||||
for (int i = 0; i < num_patches; i++) {
|
|
||||||
- patches_data[i] = i + 1;
|
|
||||||
+ patches_data[i] = i;
|
|
||||||
}
|
|
||||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
|
||||||
free(patches_data);
|
|
||||||
@@ -1,52 +1,20 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
## Patches
|
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
## Apply patches from the `patches` directory
|
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
for patch in $(ls patches); do
|
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
||||||
echo "Applying patch $patch"
|
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
||||||
patch -d llama.cpp/ -p1 < patches/$patch
|
|
||||||
done
|
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
|
||||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
|
||||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
|
||||||
cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
|
|
||||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
|
||||||
|
|
||||||
set +e
|
|
||||||
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
|
||||||
echo "grpc-server already added"
|
echo "grpc-server already added"
|
||||||
else
|
else
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||||
fi
|
fi
|
||||||
set -e
|
|
||||||
|
|
||||||
# Now to keep maximum compatibility with the original server.cpp, we need to remove the index.html.gz.hpp and loading.html.hpp includes
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
# and remove the main function
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
# TODO: upstream this to the original server.cpp by extracting the upstream main function to a separate file
|
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||||
awk '
|
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||||
/int[ \t]+main[ \t]*\(/ { # If the line starts the main function
|
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||||
in_main=1; # Set a flag
|
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||||
open_braces=0; # Track number of open braces
|
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||||
}
|
|
||||||
in_main {
|
|
||||||
open_braces += gsub(/\{/, "{"); # Count opening braces
|
|
||||||
open_braces -= gsub(/\}/, "}"); # Count closing braces
|
|
||||||
if (open_braces == 0) { # If all braces are closed
|
|
||||||
in_main=0; # End skipping
|
|
||||||
}
|
|
||||||
next; # Skip lines inside main
|
|
||||||
}
|
|
||||||
!in_main # Print lines not inside main
|
|
||||||
' "llama.cpp/tools/server/server.cpp" > llama.cpp/tools/grpc-server/server.cpp
|
|
||||||
|
|
||||||
# remove index.html.gz.hpp and loading.html.hpp includes
|
|
||||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
||||||
# macOS
|
|
||||||
sed -i '' '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
||||||
else
|
|
||||||
# Linux and others
|
|
||||||
sed -i '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
||||||
fi
|
|
||||||
510
backend/cpp/llama/utils.hpp
Normal file
510
backend/cpp/llama/utils.hpp
Normal file
@@ -0,0 +1,510 @@
|
|||||||
|
// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "json.hpp"
|
||||||
|
|
||||||
|
#include "../llava/clip.h"
|
||||||
|
|
||||||
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
extern bool server_verbose;
|
||||||
|
|
||||||
|
#ifndef SERVER_VERBOSE
|
||||||
|
#define SERVER_VERBOSE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if SERVER_VERBOSE != 1
|
||||||
|
#define LOG_VERBOSE(MSG, ...)
|
||||||
|
#else
|
||||||
|
#define LOG_VERBOSE(MSG, ...) \
|
||||||
|
do \
|
||||||
|
{ \
|
||||||
|
if (server_verbose) \
|
||||||
|
{ \
|
||||||
|
server_log("VERBOSE", __func__, __LINE__, MSG, __VA_ARGS__); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
#define LOG_WARNING(MSG, ...) server_log("WARNING", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||||
|
|
||||||
|
//
|
||||||
|
// parallel
|
||||||
|
//
|
||||||
|
|
||||||
|
enum server_state {
|
||||||
|
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
|
||||||
|
SERVER_STATE_READY, // Server is ready and model is loaded
|
||||||
|
SERVER_STATE_ERROR // An error occurred, load_model failed
|
||||||
|
};
|
||||||
|
|
||||||
|
enum task_type {
|
||||||
|
TASK_TYPE_COMPLETION,
|
||||||
|
TASK_TYPE_CANCEL,
|
||||||
|
TASK_TYPE_NEXT_RESPONSE
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_server {
|
||||||
|
int id = -1; // to be filled by llama_server_queue
|
||||||
|
int target_id;
|
||||||
|
task_type type;
|
||||||
|
json data;
|
||||||
|
bool infill_mode = false;
|
||||||
|
bool embedding_mode = false;
|
||||||
|
int multitask_id = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_result {
|
||||||
|
int id;
|
||||||
|
int multitask_id = -1;
|
||||||
|
bool stop;
|
||||||
|
bool error;
|
||||||
|
json result_json;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct task_multi {
|
||||||
|
int id;
|
||||||
|
std::set<int> subtasks_remaining{};
|
||||||
|
std::vector<task_result> results{};
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: can become bool if we can't find use of more states
|
||||||
|
enum slot_state
|
||||||
|
{
|
||||||
|
IDLE,
|
||||||
|
PROCESSING,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum slot_command
|
||||||
|
{
|
||||||
|
NONE,
|
||||||
|
LOAD_PROMPT,
|
||||||
|
RELEASE,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct slot_params
|
||||||
|
{
|
||||||
|
bool stream = true;
|
||||||
|
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
|
||||||
|
|
||||||
|
uint32_t seed = -1; // RNG seed
|
||||||
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
|
|
||||||
|
std::vector<std::string> antiprompt;
|
||||||
|
|
||||||
|
json input_prefix;
|
||||||
|
json input_suffix;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct slot_image
|
||||||
|
{
|
||||||
|
int32_t id;
|
||||||
|
|
||||||
|
bool request_encode_image = false;
|
||||||
|
float * image_embedding = nullptr;
|
||||||
|
int32_t image_tokens = 0;
|
||||||
|
|
||||||
|
clip_image_u8 * img_data;
|
||||||
|
|
||||||
|
std::string prefix_prompt; // before of this image
|
||||||
|
};
|
||||||
|
|
||||||
|
// completion token output with probabilities
|
||||||
|
struct completion_token_output
|
||||||
|
{
|
||||||
|
struct token_prob
|
||||||
|
{
|
||||||
|
llama_token tok;
|
||||||
|
float prob;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<token_prob> probs;
|
||||||
|
llama_token tok;
|
||||||
|
std::string text_to_send;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void server_log(const char *level, const char *function, int line,
|
||||||
|
const char *message, const nlohmann::ordered_json &extra)
|
||||||
|
{
|
||||||
|
nlohmann::ordered_json log
|
||||||
|
{
|
||||||
|
{"timestamp", time(nullptr)},
|
||||||
|
{"level", level},
|
||||||
|
{"function", function},
|
||||||
|
{"line", line},
|
||||||
|
{"message", message},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!extra.empty())
|
||||||
|
{
|
||||||
|
log.merge_patch(extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace);
|
||||||
|
printf("%.*s\n", (int)str.size(), str.data());
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// server utils
|
||||||
|
//
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||||
|
{
|
||||||
|
// Fallback null to default value
|
||||||
|
return body.contains(key) && !body.at(key).is_null()
|
||||||
|
? body.value(key, default_value)
|
||||||
|
: default_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string format_chatml(std::vector<json> messages)
|
||||||
|
{
|
||||||
|
std::ostringstream chatml_msgs;
|
||||||
|
|
||||||
|
for (auto it = messages.begin(); it != messages.end(); ++it) {
|
||||||
|
chatml_msgs << "<|im_start|>"
|
||||||
|
<< json_value(*it, "role", std::string("user")) << '\n';
|
||||||
|
chatml_msgs << json_value(*it, "content", std::string(""))
|
||||||
|
<< "<|im_end|>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
chatml_msgs << "<|im_start|>assistant" << '\n';
|
||||||
|
|
||||||
|
return chatml_msgs.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// work queue utils
|
||||||
|
//
|
||||||
|
|
||||||
|
struct llama_server_queue {
|
||||||
|
int id = 0;
|
||||||
|
std::mutex mutex_tasks;
|
||||||
|
// queues
|
||||||
|
std::vector<task_server> queue_tasks;
|
||||||
|
std::vector<task_server> queue_tasks_deferred;
|
||||||
|
std::vector<task_multi> queue_multitasks;
|
||||||
|
std::condition_variable condition_tasks;
|
||||||
|
// callback functions
|
||||||
|
std::function<void(task_server&)> callback_new_task;
|
||||||
|
std::function<void(task_multi&)> callback_finish_multitask;
|
||||||
|
std::function<void(void)> callback_all_task_finished;
|
||||||
|
|
||||||
|
// Add a new task to the end of the queue
|
||||||
|
int post(task_server task) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (task.id == -1) {
|
||||||
|
task.id = id++;
|
||||||
|
}
|
||||||
|
queue_tasks.push_back(std::move(task));
|
||||||
|
condition_tasks.notify_one();
|
||||||
|
return task.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a new task, but defer until one slot is available
|
||||||
|
void defer(task_server task) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
queue_tasks_deferred.push_back(std::move(task));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the next id for creating anew task
|
||||||
|
int get_new_id() {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
return id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register function to process a new task
|
||||||
|
void on_new_task(std::function<void(task_server&)> callback) {
|
||||||
|
callback_new_task = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register function to process a multitask
|
||||||
|
void on_finish_multitask(std::function<void(task_multi&)> callback) {
|
||||||
|
callback_finish_multitask = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the function to be called when the batch of tasks is finished
|
||||||
|
void on_all_tasks_finished(std::function<void(void)> callback) {
|
||||||
|
callback_all_task_finished = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call when the state of one slot is changed
|
||||||
|
void notify_slot_changed() {
|
||||||
|
// move deferred tasks back to main loop
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
for (auto & task : queue_tasks_deferred) {
|
||||||
|
queue_tasks.push_back(std::move(task));
|
||||||
|
}
|
||||||
|
queue_tasks_deferred.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the main loop. This call is blocking
|
||||||
|
[[noreturn]]
|
||||||
|
void start_loop() {
|
||||||
|
while (true) {
|
||||||
|
// new task arrived
|
||||||
|
LOG_VERBOSE("have new task", {});
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (queue_tasks.empty()) {
|
||||||
|
lock.unlock();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
task_server task = queue_tasks.front();
|
||||||
|
queue_tasks.erase(queue_tasks.begin());
|
||||||
|
lock.unlock();
|
||||||
|
LOG_VERBOSE("callback_new_task", {});
|
||||||
|
callback_new_task(task);
|
||||||
|
}
|
||||||
|
LOG_VERBOSE("callback_all_task_finished", {});
|
||||||
|
// process and update all the multitasks
|
||||||
|
auto queue_iterator = queue_multitasks.begin();
|
||||||
|
while (queue_iterator != queue_multitasks.end())
|
||||||
|
{
|
||||||
|
if (queue_iterator->subtasks_remaining.empty())
|
||||||
|
{
|
||||||
|
// all subtasks done == multitask is done
|
||||||
|
task_multi current_multitask = *queue_iterator;
|
||||||
|
callback_finish_multitask(current_multitask);
|
||||||
|
// remove this multitask
|
||||||
|
queue_iterator = queue_multitasks.erase(queue_iterator);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++queue_iterator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// all tasks in the current loop is finished
|
||||||
|
callback_all_task_finished();
|
||||||
|
}
|
||||||
|
LOG_VERBOSE("wait for new task", {});
|
||||||
|
// wait for new task
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
if (queue_tasks.empty()) {
|
||||||
|
condition_tasks.wait(lock, [&]{
|
||||||
|
return !queue_tasks.empty();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// functions to manage multitasks
|
||||||
|
//
|
||||||
|
|
||||||
|
// add a multitask by specifying the id of all subtask (subtask is a task_server)
|
||||||
|
void add_multitask(int multitask_id, std::vector<int>& sub_ids)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||||
|
task_multi multi;
|
||||||
|
multi.id = multitask_id;
|
||||||
|
std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
|
||||||
|
queue_multitasks.push_back(multi);
|
||||||
|
}
|
||||||
|
|
||||||
|
// updatethe remaining subtasks, while appending results to multitask
|
||||||
|
void update_multitask(int multitask_id, int subtask_id, task_result& result)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||||
|
for (auto& multitask : queue_multitasks)
|
||||||
|
{
|
||||||
|
if (multitask.id == multitask_id)
|
||||||
|
{
|
||||||
|
multitask.subtasks_remaining.erase(subtask_id);
|
||||||
|
multitask.results.push_back(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_server_response {
|
||||||
|
typedef std::function<void(int, int, task_result&)> callback_multitask_t;
|
||||||
|
callback_multitask_t callback_update_multitask;
|
||||||
|
// for keeping track of all tasks waiting for the result
|
||||||
|
std::set<int> waiting_task_ids;
|
||||||
|
// the main result queue
|
||||||
|
std::vector<task_result> queue_results;
|
||||||
|
std::mutex mutex_results;
|
||||||
|
std::condition_variable condition_results;
|
||||||
|
|
||||||
|
void add_waiting_task_id(int task_id) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
waiting_task_ids.insert(task_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void remove_waiting_task_id(int task_id) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
waiting_task_ids.erase(task_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function blocks the thread until there is a response for this task_id
|
||||||
|
task_result recv(int task_id) {
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
condition_results.wait(lock, [&]{
|
||||||
|
return !queue_results.empty();
|
||||||
|
});
|
||||||
|
LOG_VERBOSE("condition_results unblock", {});
|
||||||
|
|
||||||
|
for (int i = 0; i < (int) queue_results.size(); i++)
|
||||||
|
{
|
||||||
|
if (queue_results[i].id == task_id)
|
||||||
|
{
|
||||||
|
assert(queue_results[i].multitask_id == -1);
|
||||||
|
task_result res = queue_results[i];
|
||||||
|
queue_results.erase(queue_results.begin() + i);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// should never reach here
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the function to update multitask
|
||||||
|
void on_multitask_update(callback_multitask_t callback) {
|
||||||
|
callback_update_multitask = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send a new result to a waiting task_id
|
||||||
|
void send(task_result result) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
|
LOG_VERBOSE("send new result", {});
|
||||||
|
for (auto& task_id : waiting_task_ids) {
|
||||||
|
// LOG_TEE("waiting task id %i \n", task_id);
|
||||||
|
// for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
|
||||||
|
if (result.multitask_id == task_id)
|
||||||
|
{
|
||||||
|
LOG_VERBOSE("callback_update_multitask", {});
|
||||||
|
callback_update_multitask(task_id, result.id, result);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.id == task_id)
|
||||||
|
{
|
||||||
|
LOG_VERBOSE("queue_results.push_back", {});
|
||||||
|
queue_results.push_back(result);
|
||||||
|
condition_results.notify_one();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// base64 utils (TODO: move to common in the future)
|
||||||
|
//
|
||||||
|
|
||||||
|
static const std::string base64_chars =
|
||||||
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
"abcdefghijklmnopqrstuvwxyz"
|
||||||
|
"0123456789+/";
|
||||||
|
|
||||||
|
static inline bool is_base64(uint8_t c)
|
||||||
|
{
|
||||||
|
return (isalnum(c) || (c == '+') || (c == '/'));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string)
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
int j = 0;
|
||||||
|
int in_ = 0;
|
||||||
|
|
||||||
|
int in_len = encoded_string.size();
|
||||||
|
|
||||||
|
uint8_t char_array_4[4];
|
||||||
|
uint8_t char_array_3[3];
|
||||||
|
|
||||||
|
std::vector<uint8_t> ret;
|
||||||
|
|
||||||
|
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
|
||||||
|
{
|
||||||
|
char_array_4[i++] = encoded_string[in_]; in_++;
|
||||||
|
if (i == 4)
|
||||||
|
{
|
||||||
|
for (i = 0; i <4; i++)
|
||||||
|
{
|
||||||
|
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||||
|
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||||
|
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||||
|
|
||||||
|
for (i = 0; (i < 3); i++)
|
||||||
|
{
|
||||||
|
ret.push_back(char_array_3[i]);
|
||||||
|
}
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i)
|
||||||
|
{
|
||||||
|
for (j = i; j <4; j++)
|
||||||
|
{
|
||||||
|
char_array_4[j] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j <4; j++)
|
||||||
|
{
|
||||||
|
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||||
|
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||||
|
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||||
|
|
||||||
|
for (j = 0; (j < i - 1); j++)
|
||||||
|
{
|
||||||
|
ret.push_back(char_array_3[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// random string / id
|
||||||
|
//
|
||||||
|
|
||||||
|
static std::string random_string()
|
||||||
|
{
|
||||||
|
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 generator(rd());
|
||||||
|
|
||||||
|
std::string result(32, ' ');
|
||||||
|
|
||||||
|
for (int i = 0; i < 32; ++i) {
|
||||||
|
result[i] = str[generator() % str.size()];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string gen_chatcmplid()
|
||||||
|
{
|
||||||
|
std::stringstream chatcmplid;
|
||||||
|
chatcmplid << "chatcmpl-" << random_string();
|
||||||
|
return chatcmplid.str();
|
||||||
|
}
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
|
|
||||||
BUILD_TYPE?=
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
gobark.o:
|
|
||||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
|
||||||
|
|
||||||
libbark.a: gobark.o
|
|
||||||
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
|
||||||
$(AR) rcs libbark.a gobark.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f gobark.o libbark.a
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <tuple>
|
|
||||||
|
|
||||||
#include "bark.h"
|
|
||||||
#include "gobark.h"
|
|
||||||
#include "common.h"
|
|
||||||
#include "ggml.h"
|
|
||||||
|
|
||||||
struct bark_context *c;
|
|
||||||
|
|
||||||
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
|
|
||||||
if (step == bark_encoding_step::SEMANTIC) {
|
|
||||||
printf("\rGenerating semantic tokens... %d%%", progress);
|
|
||||||
} else if (step == bark_encoding_step::COARSE) {
|
|
||||||
printf("\rGenerating coarse tokens... %d%%", progress);
|
|
||||||
} else if (step == bark_encoding_step::FINE) {
|
|
||||||
printf("\rGenerating fine tokens... %d%%", progress);
|
|
||||||
}
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
int load_model(char *model) {
|
|
||||||
// initialize bark context
|
|
||||||
struct bark_context_params ctx_params = bark_context_default_params();
|
|
||||||
bark_params params;
|
|
||||||
|
|
||||||
params.model_path = model;
|
|
||||||
|
|
||||||
// ctx_params.verbosity = verbosity;
|
|
||||||
ctx_params.progress_callback = bark_print_progress_callback;
|
|
||||||
ctx_params.progress_callback_user_data = nullptr;
|
|
||||||
|
|
||||||
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
|
|
||||||
if (!bctx) {
|
|
||||||
fprintf(stderr, "%s: Could not load model\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
c = bctx;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int tts(char *text,int threads, char *dst ) {
|
|
||||||
|
|
||||||
ggml_time_init();
|
|
||||||
const int64_t t_main_start_us = ggml_time_us();
|
|
||||||
|
|
||||||
// generate audio
|
|
||||||
if (!bark_generate_audio(c, text, threads)) {
|
|
||||||
fprintf(stderr, "%s: An error occurred. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const float *audio_data = bark_get_audio_data(c);
|
|
||||||
if (audio_data == NULL) {
|
|
||||||
fprintf(stderr, "%s: Could not get audio data\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int audio_arr_size = bark_get_audio_data_size(c);
|
|
||||||
|
|
||||||
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
|
|
||||||
|
|
||||||
write_wav_on_disk(audio_arr, dst);
|
|
||||||
|
|
||||||
// report timing
|
|
||||||
{
|
|
||||||
const int64_t t_main_end_us = ggml_time_us();
|
|
||||||
const int64_t t_load_us = bark_get_load_time(c);
|
|
||||||
const int64_t t_eval_us = bark_get_eval_time(c);
|
|
||||||
|
|
||||||
printf("\n\n");
|
|
||||||
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
|
|
||||||
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
|
|
||||||
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int unload() {
|
|
||||||
bark_free(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
|
||||||
// #include <gobark.h>
|
|
||||||
// #include <stdlib.h>
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Bark struct {
|
|
||||||
base.SingleThread
|
|
||||||
threads int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Bark) Load(opts *pb.ModelOptions) error {
|
|
||||||
|
|
||||||
sd.threads = int(opts.Threads)
|
|
||||||
|
|
||||||
modelFile := C.CString(opts.ModelFile)
|
|
||||||
defer C.free(unsafe.Pointer(modelFile))
|
|
||||||
|
|
||||||
ret := C.load_model(modelFile)
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
|
|
||||||
t := C.CString(opts.Text)
|
|
||||||
defer C.free(unsafe.Pointer(t))
|
|
||||||
|
|
||||||
dst := C.CString(opts.Dst)
|
|
||||||
defer C.free(unsafe.Pointer(dst))
|
|
||||||
|
|
||||||
threads := C.int(sd.threads)
|
|
||||||
|
|
||||||
ret := C.tts(t, threads, dst)
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
int load_model(char *model);
|
|
||||||
int tts(char *text,int threads, char *dst );
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
exec ./bark-cpp
|
|
||||||
@@ -1,135 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
CMAKE_ARGS?=
|
|
||||||
BUILD_TYPE?=
|
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
|
|
||||||
GOCMD?=go
|
|
||||||
CGO_LDFLAGS?=
|
|
||||||
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
|
|
||||||
CGO_LDFLAGS_SYCL=
|
|
||||||
GO_TAGS?=
|
|
||||||
LD_FLAGS?=
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
|
||||||
CMAKE_ARGS+=-DSD_CUDA=ON
|
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# to CMAKE_ARGS automatically
|
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
|
||||||
CMAKE_ARGS+=-DSD_HIPBLAS=ON
|
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
|
||||||
# But if it's OSX without metal, disable it here
|
|
||||||
else ifeq ($(OS),Darwin)
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DSD_METAL=OFF
|
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DSD_METAL=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DSD_SYCL=ON \
|
|
||||||
-DGGML_SYCL_F16=ON
|
|
||||||
CC=icx
|
|
||||||
CXX=icpx
|
|
||||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
|
||||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
|
||||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
|
||||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DSD_SYCL=ON
|
|
||||||
CC=icx
|
|
||||||
CXX=icpx
|
|
||||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
|
||||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
|
||||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
|
||||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
# Find all .a archives in ARCHIVE_DIR
|
|
||||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
|
||||||
GGML_ARCHIVE_DIR := build/ggml/src/
|
|
||||||
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
|
|
||||||
|
|
||||||
# Name of the single merged library
|
|
||||||
COMBINED_LIB := libggmlall.a
|
|
||||||
|
|
||||||
# Rule to merge all the .a files into one
|
|
||||||
$(COMBINED_LIB): $(ALL_ARCHIVES)
|
|
||||||
@echo "Merging all .a into $(COMBINED_LIB)"
|
|
||||||
rm -f $@
|
|
||||||
mkdir -p merge-tmp
|
|
||||||
for a in $(ALL_ARCHIVES); do \
|
|
||||||
( cd merge-tmp && ar x ../$$a ); \
|
|
||||||
done
|
|
||||||
( cd merge-tmp && ar rcs ../$@ *.o )
|
|
||||||
# Ensure we have a proper index
|
|
||||||
ranlib $@
|
|
||||||
# Clean up
|
|
||||||
rm -rf merge-tmp
|
|
||||||
|
|
||||||
build/libstable-diffusion.a:
|
|
||||||
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release"
|
|
||||||
else
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
endif
|
|
||||||
$(MAKE) $(COMBINED_LIB)
|
|
||||||
|
|
||||||
gosd.o:
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
|
|
||||||
else
|
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
|
||||||
endif
|
|
||||||
|
|
||||||
libsd.a: gosd.o
|
|
||||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
|
||||||
$(AR) rcs libsd.a gosd.o
|
|
||||||
|
|
||||||
stablediffusion-ggml:
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
|
||||||
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
|
|
||||||
endif
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
|
||||||
@@ -1,231 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <random>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include "gosd.h"
|
|
||||||
|
|
||||||
// #include "preprocessing.hpp"
|
|
||||||
#include "flux.hpp"
|
|
||||||
#include "stable-diffusion.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_STATIC
|
|
||||||
#include "stb_image.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_WRITE_STATIC
|
|
||||||
#include "stb_image_write.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_RESIZE_STATIC
|
|
||||||
#include "stb_image_resize.h"
|
|
||||||
|
|
||||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
|
||||||
const char* sample_method_str[] = {
|
|
||||||
"euler_a",
|
|
||||||
"euler",
|
|
||||||
"heun",
|
|
||||||
"dpm2",
|
|
||||||
"dpm++2s_a",
|
|
||||||
"dpm++2m",
|
|
||||||
"dpm++2mv2",
|
|
||||||
"ipndm",
|
|
||||||
"ipndm_v",
|
|
||||||
"lcm",
|
|
||||||
"ddim_trailing",
|
|
||||||
"tcd",
|
|
||||||
};
|
|
||||||
|
|
||||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
|
||||||
const char* schedule_str[] = {
|
|
||||||
"default",
|
|
||||||
"discrete",
|
|
||||||
"karras",
|
|
||||||
"exponential",
|
|
||||||
"ays",
|
|
||||||
"gits",
|
|
||||||
};
|
|
||||||
|
|
||||||
sd_ctx_t* sd_c;
|
|
||||||
|
|
||||||
sample_method_t sample_method;
|
|
||||||
|
|
||||||
int load_model(char *model, char* options[], int threads, int diff) {
|
|
||||||
fprintf (stderr, "Loading model!\n");
|
|
||||||
|
|
||||||
char *stableDiffusionModel = "";
|
|
||||||
if (diff == 1 ) {
|
|
||||||
stableDiffusionModel = model;
|
|
||||||
model = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
|
||||||
char *clip_l_path = "";
|
|
||||||
char *clip_g_path = "";
|
|
||||||
char *t5xxl_path = "";
|
|
||||||
char *vae_path = "";
|
|
||||||
char *scheduler = "";
|
|
||||||
char *sampler = "";
|
|
||||||
|
|
||||||
// If options is not NULL, parse options
|
|
||||||
for (int i = 0; options[i] != NULL; i++) {
|
|
||||||
char *optname = strtok(options[i], ":");
|
|
||||||
char *optval = strtok(NULL, ":");
|
|
||||||
if (optval == NULL) {
|
|
||||||
optval = "true";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcmp(optname, "clip_l_path")) {
|
|
||||||
clip_l_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "clip_g_path")) {
|
|
||||||
clip_g_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "t5xxl_path")) {
|
|
||||||
t5xxl_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "vae_path")) {
|
|
||||||
vae_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "scheduler")) {
|
|
||||||
scheduler = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "sampler")) {
|
|
||||||
sampler = optval;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int sample_method_found = -1;
|
|
||||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
|
||||||
if (!strcmp(sampler, sample_method_str[m])) {
|
|
||||||
sample_method_found = m;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (sample_method_found == -1) {
|
|
||||||
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
|
|
||||||
sample_method_found = EULER_A;
|
|
||||||
}
|
|
||||||
sample_method = (sample_method_t)sample_method_found;
|
|
||||||
|
|
||||||
int schedule_found = -1;
|
|
||||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
|
||||||
if (!strcmp(scheduler, schedule_str[d])) {
|
|
||||||
schedule_found = d;
|
|
||||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (schedule_found == -1) {
|
|
||||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
|
||||||
schedule_found = DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
schedule_t schedule = (schedule_t)schedule_found;
|
|
||||||
|
|
||||||
fprintf (stderr, "Creating context\n");
|
|
||||||
sd_ctx_t* sd_ctx = new_sd_ctx(model,
|
|
||||||
clip_l_path,
|
|
||||||
clip_g_path,
|
|
||||||
t5xxl_path,
|
|
||||||
stableDiffusionModel,
|
|
||||||
vae_path,
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
threads,
|
|
||||||
SD_TYPE_COUNT,
|
|
||||||
STD_DEFAULT_RNG,
|
|
||||||
schedule,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false);
|
|
||||||
|
|
||||||
if (sd_ctx == NULL) {
|
|
||||||
fprintf (stderr, "failed loading model (generic error)\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fprintf (stderr, "Created context: OK\n");
|
|
||||||
|
|
||||||
sd_c = sd_ctx;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
|
|
||||||
|
|
||||||
sd_image_t* results;
|
|
||||||
|
|
||||||
std::vector<int> skip_layers = {7, 8, 9};
|
|
||||||
|
|
||||||
fprintf (stderr, "Generating image\n");
|
|
||||||
|
|
||||||
results = txt2img(sd_c,
|
|
||||||
text,
|
|
||||||
negativeText,
|
|
||||||
-1, //clip_skip
|
|
||||||
cfg_scale, // sfg_scale
|
|
||||||
3.5f,
|
|
||||||
0, // eta
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
sample_method,
|
|
||||||
steps,
|
|
||||||
seed,
|
|
||||||
1,
|
|
||||||
NULL,
|
|
||||||
0.9f,
|
|
||||||
20.f,
|
|
||||||
false,
|
|
||||||
"",
|
|
||||||
skip_layers.data(),
|
|
||||||
skip_layers.size(),
|
|
||||||
0,
|
|
||||||
0.01,
|
|
||||||
0.2);
|
|
||||||
|
|
||||||
if (results == NULL) {
|
|
||||||
fprintf (stderr, "NO results\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results[0].data == NULL) {
|
|
||||||
fprintf (stderr, "Results with no data\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf (stderr, "Writing PNG\n");
|
|
||||||
|
|
||||||
fprintf (stderr, "DST: %s\n", dst);
|
|
||||||
fprintf (stderr, "Width: %d\n", results[0].width);
|
|
||||||
fprintf (stderr, "Height: %d\n", results[0].height);
|
|
||||||
fprintf (stderr, "Channel: %d\n", results[0].channel);
|
|
||||||
fprintf (stderr, "Data: %p\n", results[0].data);
|
|
||||||
|
|
||||||
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
|
|
||||||
results[0].data, 0, NULL);
|
|
||||||
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
|
|
||||||
|
|
||||||
// TODO: free results. Why does it crash?
|
|
||||||
|
|
||||||
free(results[0].data);
|
|
||||||
results[0].data = NULL;
|
|
||||||
free(results);
|
|
||||||
fprintf (stderr, "gen_image is done", dst);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int unload() {
|
|
||||||
free_sd_ctx(sd_c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
|
|
||||||
// #include <gosd.h>
|
|
||||||
// #include <stdlib.h>
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SDGGML struct {
|
|
||||||
base.SingleThread
|
|
||||||
threads int
|
|
||||||
sampleMethod string
|
|
||||||
cfgScale float32
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
|
||||||
|
|
||||||
sd.threads = int(opts.Threads)
|
|
||||||
|
|
||||||
modelFile := C.CString(opts.ModelFile)
|
|
||||||
defer C.free(unsafe.Pointer(modelFile))
|
|
||||||
|
|
||||||
var options **C.char
|
|
||||||
// prepare the options array to pass to C
|
|
||||||
|
|
||||||
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
|
|
||||||
length := C.size_t(len(opts.Options))
|
|
||||||
options = (**C.char)(C.malloc(length * size))
|
|
||||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
|
|
||||||
|
|
||||||
var diffusionModel int
|
|
||||||
|
|
||||||
var oo []string
|
|
||||||
for _, op := range opts.Options {
|
|
||||||
if op == "diffusion_model" {
|
|
||||||
diffusionModel = 1
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it's an option path, we resolve absolute path from the model path
|
|
||||||
if strings.Contains(op, ":") && strings.Contains(op, "path") {
|
|
||||||
data := strings.Split(op, ":")
|
|
||||||
data[1] = filepath.Join(opts.ModelPath, data[1])
|
|
||||||
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
|
|
||||||
oo = append(oo, strings.Join(data, ":"))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
oo = append(oo, op)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
|
|
||||||
|
|
||||||
for i, x := range oo {
|
|
||||||
view[i] = C.CString(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
sd.cfgScale = opts.CFGScale
|
|
||||||
|
|
||||||
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("could not load model")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
t := C.CString(opts.PositivePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(t))
|
|
||||||
|
|
||||||
dst := C.CString(opts.Dst)
|
|
||||||
defer C.free(unsafe.Pointer(dst))
|
|
||||||
|
|
||||||
negative := C.CString(opts.NegativePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(negative))
|
|
||||||
|
|
||||||
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
int load_model(char *model, char* options[], int threads, int diffusionModel);
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,10 +1,11 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -14,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Bark{}); err != nil {
|
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
33
backend/go/image/stablediffusion/stablediffusion.go
Normal file
33
backend/go/image/stablediffusion/stablediffusion.go
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
base.SingleThread
|
||||||
|
stablediffusion *stablediffusion.StableDiffusion
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||||
|
var err error
|
||||||
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||||
|
return image.stablediffusion.GenerateImage(
|
||||||
|
int(opts.Height),
|
||||||
|
int(opts.Width),
|
||||||
|
int(opts.Mode),
|
||||||
|
int(opts.Step),
|
||||||
|
int(opts.Seed),
|
||||||
|
opts.PositivePrompt,
|
||||||
|
opts.NegativePrompt,
|
||||||
|
opts.Dst)
|
||||||
|
}
|
||||||
21
backend/go/image/tinydream/main.go
Normal file
21
backend/go/image/tinydream/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
32
backend/go/image/tinydream/tinydream.go
Normal file
32
backend/go/image/tinydream/tinydream.go
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/tinydream"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
base.SingleThread
|
||||||
|
tinydream *tinydream.TinyDream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||||
|
var err error
|
||||||
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
image.tinydream, err = tinydream.New(opts.ModelFile)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||||
|
return image.tinydream.GenerateImage(
|
||||||
|
int(opts.Height),
|
||||||
|
int(opts.Width),
|
||||||
|
int(opts.Step),
|
||||||
|
int(opts.Seed),
|
||||||
|
opts.PositivePrompt,
|
||||||
|
opts.NegativePrompt,
|
||||||
|
opts.Dst)
|
||||||
|
}
|
||||||
34
backend/go/llm/bert/bert.go
Normal file
34
backend/go/llm/bert/bert.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
bert "github.com/go-skynet/go-bert.cpp"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Embeddings struct {
|
||||||
|
base.SingleThread
|
||||||
|
bert *bert.Bert
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||||
|
model, err := bert.New(opts.ModelFile)
|
||||||
|
llm.bert = model
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||||
|
|
||||||
|
if len(opts.EmbeddingTokens) > 0 {
|
||||||
|
tokens := []int{}
|
||||||
|
for _, t := range opts.EmbeddingTokens {
|
||||||
|
tokens = append(tokens, int(t))
|
||||||
|
}
|
||||||
|
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
||||||
|
}
|
||||||
21
backend/go/llm/bert/main.go
Normal file
21
backend/go/llm/bert/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
62
backend/go/llm/gpt4all/gpt4all.go
Normal file
62
backend/go/llm/gpt4all/gpt4all.go
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LLM struct {
|
||||||
|
base.SingleThread
|
||||||
|
|
||||||
|
gpt4all *gpt4all.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
|
model, err := gpt4all.New(opts.ModelFile,
|
||||||
|
gpt4all.SetThreads(int(opts.Threads)),
|
||||||
|
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
||||||
|
llm.gpt4all = model
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
|
||||||
|
predictOptions := []gpt4all.PredictOption{
|
||||||
|
gpt4all.SetTemperature(float64(opts.Temperature)),
|
||||||
|
gpt4all.SetTopP(float64(opts.TopP)),
|
||||||
|
gpt4all.SetTopK(int(opts.TopK)),
|
||||||
|
gpt4all.SetTokens(int(opts.Tokens)),
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Batch != 0 {
|
||||||
|
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
|
||||||
|
}
|
||||||
|
return predictOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
llm.gpt4all.SetTokenCallback(func(token string) bool {
|
||||||
|
results <- token
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("err: ", err)
|
||||||
|
}
|
||||||
|
llm.gpt4all.SetTokenCallback(nil)
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -15,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &VAD{}); err != nil {
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -6,9 +6,9 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/langchain"
|
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
204
backend/go/llm/llama-ggml/llama.go
Normal file
204
backend/go/llm/llama-ggml/llama.go
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/go-skynet/go-llama.cpp"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LLM struct {
|
||||||
|
base.SingleThread
|
||||||
|
|
||||||
|
llama *llama.LLama
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
|
ropeFreqBase := float32(10000)
|
||||||
|
ropeFreqScale := float32(1)
|
||||||
|
|
||||||
|
if opts.RopeFreqBase != 0 {
|
||||||
|
ropeFreqBase = opts.RopeFreqBase
|
||||||
|
}
|
||||||
|
if opts.RopeFreqScale != 0 {
|
||||||
|
ropeFreqScale = opts.RopeFreqScale
|
||||||
|
}
|
||||||
|
|
||||||
|
llamaOpts := []llama.ModelOption{
|
||||||
|
llama.WithRopeFreqBase(ropeFreqBase),
|
||||||
|
llama.WithRopeFreqScale(ropeFreqScale),
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.NGQA != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.RMSNormEps != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.ContextSize != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
||||||
|
}
|
||||||
|
if opts.F16Memory {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||||
|
}
|
||||||
|
if opts.Embeddings {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||||
|
}
|
||||||
|
if opts.NGPULayers != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
||||||
|
}
|
||||||
|
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
|
||||||
|
if opts.NBatch != 0 {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
|
||||||
|
} else {
|
||||||
|
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.NUMA {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnableNUMA)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.LowVRAM {
|
||||||
|
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
||||||
|
}
|
||||||
|
|
||||||
|
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
||||||
|
llm.llama = model
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
||||||
|
ropeFreqBase := float32(10000)
|
||||||
|
ropeFreqScale := float32(1)
|
||||||
|
|
||||||
|
if opts.RopeFreqBase != 0 {
|
||||||
|
ropeFreqBase = opts.RopeFreqBase
|
||||||
|
}
|
||||||
|
if opts.RopeFreqScale != 0 {
|
||||||
|
ropeFreqScale = opts.RopeFreqScale
|
||||||
|
}
|
||||||
|
predictOptions := []llama.PredictOption{
|
||||||
|
llama.SetTemperature(opts.Temperature),
|
||||||
|
llama.SetTopP(opts.TopP),
|
||||||
|
llama.SetTopK(int(opts.TopK)),
|
||||||
|
llama.SetTokens(int(opts.Tokens)),
|
||||||
|
llama.SetThreads(int(opts.Threads)),
|
||||||
|
llama.WithGrammar(opts.Grammar),
|
||||||
|
llama.SetRopeFreqBase(ropeFreqBase),
|
||||||
|
llama.SetRopeFreqScale(ropeFreqScale),
|
||||||
|
llama.SetNegativePromptScale(opts.NegativePromptScale),
|
||||||
|
llama.SetNegativePrompt(opts.NegativePrompt),
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.PromptCacheAll {
|
||||||
|
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.PromptCacheRO {
|
||||||
|
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expected absolute path
|
||||||
|
if opts.PromptCachePath != "" {
|
||||||
|
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Mirostat != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.MirostatETA != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.MirostatTAU != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Debug {
|
||||||
|
predictOptions = append(predictOptions, llama.Debug)
|
||||||
|
}
|
||||||
|
|
||||||
|
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
|
||||||
|
|
||||||
|
if opts.PresencePenalty != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.NKeep != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Batch != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.F16KV {
|
||||||
|
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.IgnoreEOS {
|
||||||
|
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Seed != 0 {
|
||||||
|
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
|
||||||
|
}
|
||||||
|
|
||||||
|
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||||
|
|
||||||
|
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
|
||||||
|
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
|
||||||
|
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
|
||||||
|
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
|
||||||
|
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
|
||||||
|
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
|
||||||
|
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
|
||||||
|
return predictOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
|
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||||
|
results <- token
|
||||||
|
return true
|
||||||
|
}))
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("err: ", err)
|
||||||
|
}
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||||
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
|
if len(opts.EmbeddingTokens) > 0 {
|
||||||
|
tokens := []int{}
|
||||||
|
for _, t := range opts.EmbeddingTokens {
|
||||||
|
tokens = append(tokens, int(t))
|
||||||
|
}
|
||||||
|
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
|
||||||
|
}
|
||||||
19
backend/go/llm/llama-ggml/main.go
Normal file
19
backend/go/llm/llama-ggml/main.go
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,9 +6,9 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/go-llama.cpp"
|
"github.com/go-skynet/go-llama.cpp"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
@@ -58,9 +58,6 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||||||
if opts.Embeddings {
|
if opts.Embeddings {
|
||||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||||
}
|
}
|
||||||
if opts.Reranking {
|
|
||||||
llamaOpts = append(llamaOpts, llama.EnableReranking)
|
|
||||||
}
|
|
||||||
if opts.NGPULayers != 0 {
|
if opts.NGPULayers != 0 {
|
||||||
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user