mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-07 21:22:58 -05:00
Compare commits
4 Commits
fixes/ci
...
docs_updat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5b8d6a31e2 | ||
|
|
f0752be4aa | ||
|
|
bafc9effad | ||
|
|
d2934dd69f |
@@ -1,17 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
cd /workspace
|
|
||||||
|
|
||||||
# Get the files into the volume without a bind mount
|
|
||||||
if [ ! -d ".git" ]; then
|
|
||||||
git clone https://github.com/mudler/LocalAI.git .
|
|
||||||
else
|
|
||||||
git fetch
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Standard Post-Create script completed."
|
|
||||||
|
|
||||||
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
|
|
||||||
echo "Launching customization postcreate.sh"
|
|
||||||
bash "/devcontainer-customization/postcreate.sh"
|
|
||||||
fi
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
cd /workspace
|
|
||||||
|
|
||||||
# Grab the pre-stashed backend assets to avoid build issues
|
|
||||||
cp -r /build/backend-assets /workspace/backend-assets
|
|
||||||
|
|
||||||
# Ensures generated source files are present upon load
|
|
||||||
make prepare
|
|
||||||
|
|
||||||
echo "Standard Post-Start script completed."
|
|
||||||
|
|
||||||
if [ -f "/devcontainer-customization/poststart.sh" ]; then
|
|
||||||
echo "Launching customization poststart.sh"
|
|
||||||
bash "/devcontainer-customization/poststart.sh"
|
|
||||||
fi
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# This file contains some really simple functions that are useful when building up customization scripts.
|
|
||||||
|
|
||||||
|
|
||||||
# Checks if the git config has a user registered - and sets it up if not.
|
|
||||||
#
|
|
||||||
# Param 1: name
|
|
||||||
# Param 2: email
|
|
||||||
#
|
|
||||||
config_user() {
|
|
||||||
echo "Configuring git for $1 <$2>"
|
|
||||||
local gcn=$(git config --global user.name)
|
|
||||||
if [ -z "${gcn}" ]; then
|
|
||||||
echo "Setting up git user / remote"
|
|
||||||
git config --global user.name "$1"
|
|
||||||
git config --global user.email "$2"
|
|
||||||
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
|
|
||||||
#
|
|
||||||
# Param 1: remote name
|
|
||||||
# Param 2: remote url
|
|
||||||
#
|
|
||||||
config_remote() {
|
|
||||||
echo "Adding git remote and fetching $2 as $1"
|
|
||||||
local gr=$(git remote -v | grep $1)
|
|
||||||
if [ -z "${gr}" ]; then
|
|
||||||
git remote add $1 $2
|
|
||||||
fi
|
|
||||||
git fetch $1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Setup special .ssh files
|
|
||||||
# Prints out lines of text to make things pretty
|
|
||||||
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
|
|
||||||
setup_ssh() {
|
|
||||||
echo "starting ~/.ssh directory setup..."
|
|
||||||
mkdir -p "${HOME}.ssh"
|
|
||||||
chmod 0700 "${HOME}/.ssh"
|
|
||||||
echo "-----"
|
|
||||||
local files=("$@")
|
|
||||||
for file in "${files[@]}" ; do
|
|
||||||
local cfile="/devcontainer-customization/${file}"
|
|
||||||
local hfile="${HOME}/.ssh/${file}"
|
|
||||||
if [ ! -f "${hfile}" ]; then
|
|
||||||
echo "copying \"${file}\""
|
|
||||||
cp "${cfile}" "${hfile}"
|
|
||||||
chmod 600 "${hfile}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
echo "~/.ssh directory setup complete!"
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
Place any additional resources your environment requires in this directory
|
|
||||||
|
|
||||||
Script hooks are currently called for:
|
|
||||||
`postcreate.sh` and `poststart.sh`
|
|
||||||
|
|
||||||
If files with those names exist here, they will be called at the end of the normal script.
|
|
||||||
|
|
||||||
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
|
|
||||||
|
|
||||||
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
|
|
||||||
|
|
||||||
```
|
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
source "/.devcontainer-scripts/utils.sh"
|
|
||||||
|
|
||||||
sshfiles=("config", "key.pub")
|
|
||||||
|
|
||||||
setup_ssh "${sshfiles[@]}"
|
|
||||||
|
|
||||||
config_user "YOUR NAME" "YOUR EMAIL"
|
|
||||||
|
|
||||||
config_remote "REMOTE NAME" "REMOTE URL"
|
|
||||||
|
|
||||||
```
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
{
|
|
||||||
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
|
|
||||||
"name": "LocalAI",
|
|
||||||
"workspaceFolder": "/workspace",
|
|
||||||
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
|
|
||||||
"service": "api",
|
|
||||||
"shutdownAction": "stopCompose",
|
|
||||||
"customizations": {
|
|
||||||
"vscode": {
|
|
||||||
"extensions": [
|
|
||||||
"golang.go",
|
|
||||||
"ms-vscode.makefile-tools",
|
|
||||||
"ms-azuretools.vscode-docker",
|
|
||||||
"ms-python.python",
|
|
||||||
"ms-python.debugpy",
|
|
||||||
"wayou.vscode-todo-highlight",
|
|
||||||
"waderyan.gitblame"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"forwardPorts": [8080, 3000],
|
|
||||||
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
|
|
||||||
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
|
|
||||||
}
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
services:
|
|
||||||
api:
|
|
||||||
build:
|
|
||||||
context: ..
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
target: devcontainer
|
|
||||||
args:
|
|
||||||
- FFMPEG=true
|
|
||||||
- IMAGE_TYPE=extras
|
|
||||||
- GO_TAGS=stablediffusion p2p tts
|
|
||||||
env_file:
|
|
||||||
- ../.env
|
|
||||||
ports:
|
|
||||||
- 8080:8080
|
|
||||||
volumes:
|
|
||||||
- localai_workspace:/workspace
|
|
||||||
- ../models:/host-models
|
|
||||||
- ./customization:/devcontainer-customization
|
|
||||||
command: /bin/sh -c "while sleep 1000; do :; done"
|
|
||||||
cap_add:
|
|
||||||
- SYS_PTRACE
|
|
||||||
security_opt:
|
|
||||||
- seccomp:unconfined
|
|
||||||
prometheus:
|
|
||||||
image: prom/prometheus
|
|
||||||
container_name: prometheus
|
|
||||||
command:
|
|
||||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
||||||
ports:
|
|
||||||
- 9090:9090
|
|
||||||
restart: unless-stopped
|
|
||||||
volumes:
|
|
||||||
- ./prometheus:/etc/prometheus
|
|
||||||
- prom_data:/prometheus
|
|
||||||
grafana:
|
|
||||||
image: grafana/grafana
|
|
||||||
container_name: grafana
|
|
||||||
ports:
|
|
||||||
- 3000:3000
|
|
||||||
restart: unless-stopped
|
|
||||||
environment:
|
|
||||||
- GF_SECURITY_ADMIN_USER=admin
|
|
||||||
- GF_SECURITY_ADMIN_PASSWORD=grafana
|
|
||||||
volumes:
|
|
||||||
- ./grafana:/etc/grafana/provisioning/datasources
|
|
||||||
volumes:
|
|
||||||
prom_data:
|
|
||||||
localai_workspace:
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
|
|
||||||
apiVersion: 1
|
|
||||||
|
|
||||||
datasources:
|
|
||||||
- name: Prometheus
|
|
||||||
type: prometheus
|
|
||||||
url: http://prometheus:9090
|
|
||||||
isDefault: true
|
|
||||||
access: proxy
|
|
||||||
editable: true
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
global:
|
|
||||||
scrape_interval: 15s
|
|
||||||
scrape_timeout: 10s
|
|
||||||
evaluation_interval: 15s
|
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- static_configs:
|
|
||||||
- targets: []
|
|
||||||
scheme: http
|
|
||||||
timeout: 10s
|
|
||||||
api_version: v1
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: prometheus
|
|
||||||
honor_timestamps: true
|
|
||||||
scrape_interval: 15s
|
|
||||||
scrape_timeout: 10s
|
|
||||||
metrics_path: /metrics
|
|
||||||
scheme: http
|
|
||||||
static_configs:
|
|
||||||
- targets:
|
|
||||||
- localhost:9090
|
|
||||||
@@ -1,17 +1,6 @@
|
|||||||
.idea
|
.idea
|
||||||
.github
|
|
||||||
.vscode
|
|
||||||
.devcontainer
|
|
||||||
models
|
models
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
examples/**/models
|
examples/**/models
|
||||||
Dockerfile*
|
Dockerfile
|
||||||
__pycache__
|
|
||||||
|
|
||||||
# SonarQube
|
|
||||||
.scannerwork
|
|
||||||
|
|
||||||
# backend virtual environments
|
|
||||||
**/venv
|
|
||||||
backend/python/**/source
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
|
|
||||||
root = true
|
|
||||||
|
|
||||||
[*]
|
|
||||||
indent_style = space
|
|
||||||
indent_size = 2
|
|
||||||
end_of_line = lf
|
|
||||||
charset = utf-8
|
|
||||||
trim_trailing_whitespace = true
|
|
||||||
insert_final_newline = true
|
|
||||||
|
|
||||||
[*.go]
|
|
||||||
indent_style = tab
|
|
||||||
|
|
||||||
[Makefile]
|
|
||||||
indent_style = tab
|
|
||||||
|
|
||||||
[*.proto]
|
|
||||||
indent_size = 2
|
|
||||||
|
|
||||||
[*.py]
|
|
||||||
indent_size = 4
|
|
||||||
|
|
||||||
[*.js]
|
|
||||||
indent_size = 2
|
|
||||||
|
|
||||||
[*.yaml]
|
|
||||||
indent_size = 2
|
|
||||||
|
|
||||||
[*.md]
|
|
||||||
trim_trailing_whitespace = false
|
|
||||||
46
.env
46
.env
@@ -1,33 +1,33 @@
|
|||||||
## Set number of threads.
|
## Set number of threads.
|
||||||
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
|
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
|
||||||
# LOCALAI_THREADS=14
|
# THREADS=14
|
||||||
|
|
||||||
## Specify a different bind address (defaults to ":8080")
|
## Specify a different bind address (defaults to ":8080")
|
||||||
# LOCALAI_ADDRESS=127.0.0.1:8080
|
# ADDRESS=127.0.0.1:8080
|
||||||
|
|
||||||
## Default models context size
|
## Default models context size
|
||||||
# LOCALAI_CONTEXT_SIZE=512
|
# CONTEXT_SIZE=512
|
||||||
#
|
#
|
||||||
## Define galleries.
|
## Define galleries.
|
||||||
## models will to install will be visible in `/models/available`
|
## models will to install will be visible in `/models/available`
|
||||||
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
|
# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
||||||
|
|
||||||
## CORS settings
|
## CORS settings
|
||||||
# LOCALAI_CORS=true
|
# CORS=true
|
||||||
# LOCALAI_CORS_ALLOW_ORIGINS=*
|
# CORS_ALLOW_ORIGINS=*
|
||||||
|
|
||||||
## Default path for models
|
## Default path for models
|
||||||
#
|
#
|
||||||
# LOCALAI_MODELS_PATH=/models
|
# MODELS_PATH=/models
|
||||||
|
|
||||||
## Enable debug mode
|
## Enable debug mode
|
||||||
# LOCALAI_LOG_LEVEL=debug
|
# DEBUG=true
|
||||||
|
|
||||||
## Disables COMPEL (Diffusers)
|
## Disables COMPEL (Diffusers)
|
||||||
# COMPEL=0
|
# COMPEL=0
|
||||||
|
|
||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
# SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
@@ -46,13 +46,13 @@
|
|||||||
# GO_TAGS=stablediffusion
|
# GO_TAGS=stablediffusion
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# IMAGE_PATH=/tmp
|
||||||
|
|
||||||
## Specify a default upload limit in MB (whisper)
|
## Specify a default upload limit in MB (whisper)
|
||||||
# LOCALAI_UPLOAD_LIMIT=15
|
# UPLOAD_LIMIT
|
||||||
|
|
||||||
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
|
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
|
||||||
# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
|
# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
|
||||||
|
|
||||||
### Advanced settings ###
|
### Advanced settings ###
|
||||||
### Those are not really used by LocalAI, but from components in the stack ###
|
### Those are not really used by LocalAI, but from components in the stack ###
|
||||||
@@ -71,27 +71,19 @@
|
|||||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||||
# LLAMACPP_PARALLEL=1
|
# LLAMACPP_PARALLEL=1
|
||||||
|
|
||||||
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
|
||||||
# https://github.com/ggerganov/llama.cpp/pull/6829
|
|
||||||
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
|
|
||||||
# LLAMACPP_GRPC_SERVERS=""
|
|
||||||
|
|
||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
# LOCALAI_PARALLEL_REQUESTS=true
|
# PARALLEL_REQUESTS=true
|
||||||
|
|
||||||
# Enable to allow p2p mode
|
|
||||||
# LOCALAI_P2P=true
|
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
# LOCALAI_WATCHDOG_IDLE=true
|
# WATCHDOG_IDLE=true
|
||||||
#
|
|
||||||
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
|
||||||
# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
|
|
||||||
#
|
#
|
||||||
# Enables watchdog to kill backends that are busy for too much time
|
# Enables watchdog to kill backends that are busy for too much time
|
||||||
# LOCALAI_WATCHDOG_BUSY=true
|
# WATCHDOG_BUSY=true
|
||||||
|
#
|
||||||
|
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
||||||
|
# WATCHDOG_IDLE_TIMEOUT=5m
|
||||||
#
|
#
|
||||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
# WATCHDOG_BUSY_TIMEOUT=5m
|
||||||
13
.github/bump_deps.sh
vendored
13
.github/bump_deps.sh
vendored
@@ -6,17 +6,4 @@ VAR=$3
|
|||||||
|
|
||||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||||
|
|
||||||
# Read $VAR from Makefile (only first match)
|
|
||||||
set +e
|
|
||||||
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
|
|
||||||
set -e
|
|
||||||
|
|
||||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||||
|
|
||||||
if [ -z "$CURRENT_COMMIT" ]; then
|
|
||||||
echo "Could not find $VAR in Makefile."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
|
|
||||||
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
|
|
||||||
2
.github/bump_docs.sh
vendored
2
.github/bump_docs.sh
vendored
@@ -2,6 +2,6 @@
|
|||||||
set -xe
|
set -xe
|
||||||
REPO=$1
|
REPO=$1
|
||||||
|
|
||||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
|
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
|
||||||
|
|
||||||
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
||||||
|
|||||||
85
.github/check_and_update.py
vendored
85
.github/check_and_update.py
vendored
@@ -1,85 +0,0 @@
|
|||||||
import hashlib
|
|
||||||
from huggingface_hub import hf_hub_download, get_paths_info
|
|
||||||
import requests
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
uri = sys.argv[1]
|
|
||||||
file_name = uri.split('/')[-1]
|
|
||||||
|
|
||||||
# Function to parse the URI and determine download method
|
|
||||||
def parse_uri(uri):
|
|
||||||
if uri.startswith('huggingface://'):
|
|
||||||
repo_id = uri.split('://')[1]
|
|
||||||
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
|
||||||
elif 'huggingface.co' in uri:
|
|
||||||
parts = uri.split('/resolve/')
|
|
||||||
if len(parts) > 1:
|
|
||||||
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
|
||||||
return 'huggingface', repo_path
|
|
||||||
return 'direct', uri
|
|
||||||
|
|
||||||
def calculate_sha256(file_path):
|
|
||||||
sha256_hash = hashlib.sha256()
|
|
||||||
with open(file_path, 'rb') as f:
|
|
||||||
for byte_block in iter(lambda: f.read(4096), b''):
|
|
||||||
sha256_hash.update(byte_block)
|
|
||||||
return sha256_hash.hexdigest()
|
|
||||||
|
|
||||||
def manual_safety_check_hf(repo_id):
|
|
||||||
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
|
||||||
scan = scanResponse.json()
|
|
||||||
# Check if 'hasUnsafeFile' exists in the response
|
|
||||||
if 'hasUnsafeFile' in scan:
|
|
||||||
if scan['hasUnsafeFile']:
|
|
||||||
return scan
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
download_type, repo_id_or_url = parse_uri(uri)
|
|
||||||
|
|
||||||
new_checksum = None
|
|
||||||
file_path = None
|
|
||||||
|
|
||||||
# Decide download method based on URI type
|
|
||||||
if download_type == 'huggingface':
|
|
||||||
# Check if the repo is flagged as dangerous by HF
|
|
||||||
hazard = manual_safety_check_hf(repo_id_or_url)
|
|
||||||
if hazard != None:
|
|
||||||
print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
|
|
||||||
sys.exit(5)
|
|
||||||
# Use HF API to pull sha
|
|
||||||
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
|
|
||||||
try:
|
|
||||||
new_checksum = file.lfs.sha256
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
if new_checksum is None:
|
|
||||||
try:
|
|
||||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
|
||||||
except Exception as e:
|
|
||||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
response = requests.get(repo_id_or_url)
|
|
||||||
if response.status_code == 200:
|
|
||||||
with open(file_name, 'wb') as f:
|
|
||||||
f.write(response.content)
|
|
||||||
file_path = file_name
|
|
||||||
elif response.status_code == 404:
|
|
||||||
print(f'File not found: {response.status_code}', file=sys.stderr)
|
|
||||||
sys.exit(2)
|
|
||||||
else:
|
|
||||||
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
if new_checksum is None:
|
|
||||||
new_checksum = calculate_sha256(file_path)
|
|
||||||
print(new_checksum)
|
|
||||||
os.remove(file_path)
|
|
||||||
else:
|
|
||||||
print(new_checksum)
|
|
||||||
63
.github/checksum_checker.sh
vendored
63
.github/checksum_checker.sh
vendored
@@ -1,63 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# This scripts needs yq and huggingface_hub to be installed
|
|
||||||
# to install hugingface_hub run pip install huggingface_hub
|
|
||||||
|
|
||||||
# Path to the input YAML file
|
|
||||||
input_yaml=$1
|
|
||||||
|
|
||||||
# Function to download file and check checksum using Python
|
|
||||||
function check_and_update_checksum() {
|
|
||||||
model_name="$1"
|
|
||||||
file_name="$2"
|
|
||||||
uri="$3"
|
|
||||||
old_checksum="$4"
|
|
||||||
idx="$5"
|
|
||||||
|
|
||||||
# Download the file and calculate new checksum using Python
|
|
||||||
new_checksum=$(python3 ./.github/check_and_update.py $uri)
|
|
||||||
result=$?
|
|
||||||
|
|
||||||
if [[ $result -eq 5 ]]; then
|
|
||||||
echo "Contaminated entry detected, deleting entry for $model_name..."
|
|
||||||
yq eval -i "del([$idx])" "$input_yaml"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$new_checksum" == "" ]]; then
|
|
||||||
echo "Error calculating checksum for $file_name. Skipping..."
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checksum for $file_name: $new_checksum"
|
|
||||||
|
|
||||||
# Compare and update the YAML file if checksums do not match
|
|
||||||
|
|
||||||
if [[ $result -eq 2 ]]; then
|
|
||||||
echo "File not found, deleting entry for $file_name..."
|
|
||||||
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
|
|
||||||
elif [[ "$old_checksum" != "$new_checksum" ]]; then
|
|
||||||
echo "Checksum mismatch for $file_name. Updating..."
|
|
||||||
yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
|
|
||||||
yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
|
|
||||||
elif [[ $result -ne 0 ]]; then
|
|
||||||
echo "Error downloading file $file_name. Skipping..."
|
|
||||||
else
|
|
||||||
echo "Checksum match for $file_name. No update needed."
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Read the YAML and process each file
|
|
||||||
len=$(yq eval '. | length' "$input_yaml")
|
|
||||||
for ((i=0; i<$len; i++))
|
|
||||||
do
|
|
||||||
name=$(yq eval ".[$i].name" "$input_yaml")
|
|
||||||
files_len=$(yq eval ".[$i].files | length" "$input_yaml")
|
|
||||||
for ((j=0; j<$files_len; j++))
|
|
||||||
do
|
|
||||||
filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
|
|
||||||
uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
|
|
||||||
checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
|
|
||||||
echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
|
|
||||||
check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
|
|
||||||
done
|
|
||||||
done
|
|
||||||
304
.github/ci/modelslist.go
vendored
304
.github/ci/modelslist.go
vendored
@@ -1,304 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"html/template"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/microcosm-cc/bluemonday"
|
|
||||||
"gopkg.in/yaml.v3"
|
|
||||||
)
|
|
||||||
|
|
||||||
var modelPageTemplate string = `
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<title>LocalAI models</title>
|
|
||||||
<link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
|
|
||||||
<script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
|
|
||||||
|
|
||||||
<link
|
|
||||||
rel="stylesheet"
|
|
||||||
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
|
|
||||||
/>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
|
|
||||||
></script>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
|
|
||||||
></script>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
|
|
||||||
></script>
|
|
||||||
<script
|
|
||||||
defer
|
|
||||||
src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
|
|
||||||
></script>
|
|
||||||
|
|
||||||
<link href="/static/general.css" rel="stylesheet" />
|
|
||||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
|
||||||
<link
|
|
||||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
|
|
||||||
rel="stylesheet" />
|
|
||||||
<link
|
|
||||||
rel="stylesheet"
|
|
||||||
href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
|
|
||||||
<script src="https://cdn.tailwindcss.com/3.3.0"></script>
|
|
||||||
<script>
|
|
||||||
tailwind.config = {
|
|
||||||
darkMode: "class",
|
|
||||||
theme: {
|
|
||||||
fontFamily: {
|
|
||||||
sans: ["Roboto", "sans-serif"],
|
|
||||||
body: ["Roboto", "sans-serif"],
|
|
||||||
mono: ["ui-monospace", "monospace"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
corePlugins: {
|
|
||||||
preflight: false,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
</script>
|
|
||||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
|
|
||||||
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="bg-gray-900 text-gray-200">
|
|
||||||
<div class="flex flex-col min-h-screen">
|
|
||||||
|
|
||||||
<nav class="bg-gray-800 shadow-lg">
|
|
||||||
<div class="container mx-auto px-4 py-4">
|
|
||||||
<div class="flex items-center justify-between">
|
|
||||||
<div class="flex items-center">
|
|
||||||
<a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
|
|
||||||
<a href="/" class="text-white text-xl font-bold">LocalAI</a>
|
|
||||||
</div>
|
|
||||||
<!-- Menu button for small screens -->
|
|
||||||
<div class="lg:hidden">
|
|
||||||
<button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
|
|
||||||
<i class="fas fa-bars fa-lg"></i>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
<!-- Navigation links -->
|
|
||||||
<div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
|
|
||||||
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<!-- Collapsible menu for small screens -->
|
|
||||||
<div class="hidden lg:hidden" id="mobile-menu">
|
|
||||||
<div class="pt-4 pb-3 border-t border-gray-700">
|
|
||||||
|
|
||||||
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<style>
|
|
||||||
.is-hidden {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
|
|
||||||
<div class="container mx-auto px-4 flex-grow">
|
|
||||||
|
|
||||||
<div class="models mt-12">
|
|
||||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
|
||||||
LocalAI model gallery list </h2><br>
|
|
||||||
|
|
||||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
|
||||||
|
|
||||||
🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
|
|
||||||
<i class="fas fa-circle-info pr-2"></i>
|
|
||||||
</a></h2>
|
|
||||||
|
|
||||||
<h3>
|
|
||||||
Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
|
|
||||||
|
|
||||||
You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
|
|
||||||
</h3>
|
|
||||||
|
|
||||||
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
|
|
||||||
id="searchbox" placeholder="Live search keyword..">
|
|
||||||
<div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
|
|
||||||
{{ range $_, $model := .Models }}
|
|
||||||
<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
|
|
||||||
<div>
|
|
||||||
{{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
|
|
||||||
{{ if $model.Icon }}
|
|
||||||
{{ $icon = $model.Icon }}
|
|
||||||
{{ end }}
|
|
||||||
<div class="flex justify-center items-center">
|
|
||||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
|
|
||||||
</div>
|
|
||||||
<div class="p-6 text-surface dark:text-white">
|
|
||||||
<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
|
|
||||||
|
|
||||||
|
|
||||||
<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
<div class="px-6 pt-4 pb-2">
|
|
||||||
|
|
||||||
<!-- Modal toggle -->
|
|
||||||
<button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
|
|
||||||
More info
|
|
||||||
</button>
|
|
||||||
|
|
||||||
<!-- Main modal -->
|
|
||||||
<div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
|
|
||||||
<div class="relative p-4 w-full max-w-2xl max-h-full">
|
|
||||||
<!-- Modal content -->
|
|
||||||
<div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
|
|
||||||
<!-- Modal header -->
|
|
||||||
<div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
|
|
||||||
<h3 class="text-xl font-semibold text-gray-900 dark:text-white">
|
|
||||||
{{ $model.Name}}
|
|
||||||
</h3>
|
|
||||||
<button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
|
|
||||||
<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
|
|
||||||
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
|
|
||||||
</svg>
|
|
||||||
<span class="sr-only">Close modal</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
<!-- Modal body -->
|
|
||||||
<div class="p-4 md:p-5 space-y-4">
|
|
||||||
<div class="flex justify-center items-center">
|
|
||||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
|
||||||
{{ $model.Description }}
|
|
||||||
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
|
||||||
To install the model with the CLI, run: <br>
|
|
||||||
<code> local-ai models install {{$model.Name}} </code> <br>
|
|
||||||
|
|
||||||
<hr>
|
|
||||||
See also <a href="https://localai.io/models/" target="_blank" >
|
|
||||||
Installation <i class="fas fa-circle-info pr-2"></i>
|
|
||||||
</a> to see how to install models with the REST API.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
|
||||||
<ul>
|
|
||||||
{{ range $_, $u := $model.URLs }}
|
|
||||||
<li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
|
|
||||||
{{ end }}
|
|
||||||
</ul>
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
<!-- Modal footer -->
|
|
||||||
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
|
|
||||||
<button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
var lazyLoadInstance = new LazyLoad({
|
|
||||||
// Your custom settings go here
|
|
||||||
});
|
|
||||||
|
|
||||||
let cards = document.querySelectorAll('.box')
|
|
||||||
|
|
||||||
function liveSearch() {
|
|
||||||
let search_query = document.getElementById("searchbox").value;
|
|
||||||
|
|
||||||
//Use innerText if all contents are visible
|
|
||||||
//Use textContent for including hidden elements
|
|
||||||
for (var i = 0; i < cards.length; i++) {
|
|
||||||
if(cards[i].textContent.toLowerCase()
|
|
||||||
.includes(search_query.toLowerCase())) {
|
|
||||||
cards[i].classList.remove("is-hidden");
|
|
||||||
} else {
|
|
||||||
cards[i].classList.add("is-hidden");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//A little delay
|
|
||||||
let typingTimer;
|
|
||||||
let typeInterval = 500;
|
|
||||||
let searchInput = document.getElementById('searchbox');
|
|
||||||
|
|
||||||
searchInput.addEventListener('keyup', () => {
|
|
||||||
clearTimeout(typingTimer);
|
|
||||||
typingTimer = setTimeout(liveSearch, typeInterval);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`
|
|
||||||
|
|
||||||
type GalleryModel struct {
|
|
||||||
Name string `json:"name" yaml:"name"`
|
|
||||||
URLs []string `json:"urls" yaml:"urls"`
|
|
||||||
Icon string `json:"icon" yaml:"icon"`
|
|
||||||
Description string `json:"description" yaml:"description"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
// read the YAML file which contains the models
|
|
||||||
|
|
||||||
f, err := ioutil.ReadFile(os.Args[1])
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Error reading file:", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
models := []*GalleryModel{}
|
|
||||||
err = yaml.Unmarshal(f, &models)
|
|
||||||
if err != nil {
|
|
||||||
// write to stderr
|
|
||||||
os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that all arbitrary text content is sanitized before display
|
|
||||||
for i, m := range models {
|
|
||||||
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
|
|
||||||
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
|
|
||||||
}
|
|
||||||
|
|
||||||
// render the template
|
|
||||||
data := struct {
|
|
||||||
Models []*GalleryModel
|
|
||||||
AvailableModels int
|
|
||||||
}{
|
|
||||||
Models: models,
|
|
||||||
AvailableModels: len(models),
|
|
||||||
}
|
|
||||||
tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
|
|
||||||
|
|
||||||
err = tmpl.Execute(os.Stdout, data)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Error executing template:", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
135
.github/dependabot.yml
vendored
135
.github/dependabot.yml
vendored
@@ -1,135 +0,0 @@
|
|||||||
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
|
||||||
version: 2
|
|
||||||
updates:
|
|
||||||
- package-ecosystem: "gitsubmodule"
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "gomod"
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
ignore:
|
|
||||||
- dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
- package-ecosystem: "github-actions"
|
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
# Check for updates to GitHub Actions every weekday
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
# Check for updates to GitHub Actions every weekday
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
# Check for updates to GitHub Actions every weekday
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/autogptq"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/bark"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/common/template"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/coqui"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/diffusers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/exllama"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/exllama2"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/mamba"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/openvoice"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/parler-tts"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/rerankers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/sentencetransformers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/transformers"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/transformers-musicgen"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/vall-e-x"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/vllm"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/chainlit"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/functions"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/langchain/langchainpy-localai-example"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/langchain-chroma"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/examples/streamlit-bot"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/k8sgpt"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/kubernetes"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/langchain"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "gomod"
|
|
||||||
directory: "/examples/semantic-todo"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "docker"
|
|
||||||
directory: "/examples/telegram-bot"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
24
.github/labeler.yml
vendored
24
.github/labeler.yml
vendored
@@ -1,24 +0,0 @@
|
|||||||
enhancements:
|
|
||||||
- head-branch: ['^feature', 'feature']
|
|
||||||
|
|
||||||
kind/documentation:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'docs/*'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.md'
|
|
||||||
|
|
||||||
area/ai-model:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'gallery/*'
|
|
||||||
|
|
||||||
examples:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'examples/*'
|
|
||||||
|
|
||||||
ci:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '.github/*'
|
|
||||||
13
.github/release.yml
vendored
13
.github/release.yml
vendored
@@ -12,23 +12,10 @@ changelog:
|
|||||||
- title: "Bug fixes :bug:"
|
- title: "Bug fixes :bug:"
|
||||||
labels:
|
labels:
|
||||||
- bug
|
- bug
|
||||||
- regression
|
|
||||||
- title: "🖧 P2P area"
|
|
||||||
labels:
|
|
||||||
- area/p2p
|
|
||||||
- title: Exciting New Features 🎉
|
- title: Exciting New Features 🎉
|
||||||
labels:
|
labels:
|
||||||
- Semver-Minor
|
- Semver-Minor
|
||||||
- enhancement
|
- enhancement
|
||||||
- ux
|
|
||||||
- roadmap
|
|
||||||
- title: 🧠 Models
|
|
||||||
labels:
|
|
||||||
- area/ai-model
|
|
||||||
- title: 📖 Documentation and examples
|
|
||||||
labels:
|
|
||||||
- kind/documentation
|
|
||||||
- examples
|
|
||||||
- title: 👒 Dependencies
|
- title: 👒 Dependencies
|
||||||
labels:
|
labels:
|
||||||
- dependencies
|
- dependencies
|
||||||
|
|||||||
25
.github/workflows/bump_deps.yaml
vendored
25
.github/workflows/bump_deps.yaml
vendored
@@ -9,6 +9,9 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
- repository: "go-skynet/go-llama.cpp"
|
||||||
|
variable: "GOLLAMA_VERSION"
|
||||||
|
branch: "master"
|
||||||
- repository: "ggerganov/llama.cpp"
|
- repository: "ggerganov/llama.cpp"
|
||||||
variable: "CPPLLAMA_VERSION"
|
variable: "CPPLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
@@ -27,6 +30,9 @@ jobs:
|
|||||||
- repository: "go-skynet/bloomz.cpp"
|
- repository: "go-skynet/bloomz.cpp"
|
||||||
variable: "BLOOMZ_VERSION"
|
variable: "BLOOMZ_VERSION"
|
||||||
branch: "main"
|
branch: "main"
|
||||||
|
- repository: "nomic-ai/gpt4all"
|
||||||
|
variable: "GPT4ALL_VERSION"
|
||||||
|
branch: "main"
|
||||||
- repository: "mudler/go-ggllm.cpp"
|
- repository: "mudler/go-ggllm.cpp"
|
||||||
variable: "GOGGLLM_VERSION"
|
variable: "GOGGLLM_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
@@ -40,30 +46,17 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
id: bump
|
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
cat "${{ matrix.variable }}_message.txt"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
{
|
|
||||||
echo 'commit<<EOF'
|
|
||||||
cat "${{ matrix.variable }}_commit.txt"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
rm -rfv ${{ matrix.variable }}_message.txt
|
|
||||||
rm -rfv ${{ matrix.variable }}_commit.txt
|
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v5
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
|
title: ':arrow_up: Update ${{ matrix.repository }}'
|
||||||
branch: "update/${{ matrix.variable }}"
|
branch: "update/${{ matrix.variable }}"
|
||||||
body: ${{ steps.bump.outputs.message }}
|
body: Bump of ${{ matrix.repository }} version
|
||||||
signoff: true
|
signoff: true
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
4
.github/workflows/bump_docs.yaml
vendored
4
.github/workflows/bump_docs.yaml
vendored
@@ -17,12 +17,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v5
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||||
title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
|
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||||
branch: "update/docs"
|
branch: "update/docs"
|
||||||
body: Bump of ${{ matrix.repository }} version inside docs
|
body: Bump of ${{ matrix.repository }} version inside docs
|
||||||
signoff: true
|
signoff: true
|
||||||
|
|||||||
47
.github/workflows/checksum_checker.yaml
vendored
47
.github/workflows/checksum_checker.yaml
vendored
@@ -1,47 +0,0 @@
|
|||||||
name: Check if checksums are up-to-date
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: 0 20 * * *
|
|
||||||
workflow_dispatch:
|
|
||||||
jobs:
|
|
||||||
checksum_check:
|
|
||||||
runs-on: arc-runner-set
|
|
||||||
steps:
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y pip wget
|
|
||||||
sudo pip install --upgrade pip
|
|
||||||
pip install huggingface_hub
|
|
||||||
- name: 'Setup yq'
|
|
||||||
uses: dcarbone/install-yq-action@v1.1.1
|
|
||||||
with:
|
|
||||||
version: 'v4.44.2'
|
|
||||||
download-compressed: true
|
|
||||||
force: true
|
|
||||||
|
|
||||||
- name: Checksum checker 🔧
|
|
||||||
run: |
|
|
||||||
export HF_HOME=/hf_cache
|
|
||||||
sudo mkdir /hf_cache
|
|
||||||
sudo chmod 777 /hf_cache
|
|
||||||
bash .github/checksum_checker.sh gallery/index.yaml
|
|
||||||
- name: Create Pull Request
|
|
||||||
uses: peter-evans/create-pull-request@v7
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
|
||||||
push-to-fork: ci-forks/LocalAI
|
|
||||||
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
|
|
||||||
title: 'chore(model-gallery): :arrow_up: update checksum'
|
|
||||||
branch: "update/checksum"
|
|
||||||
body: Updating checksums in gallery/index.yaml
|
|
||||||
signoff: true
|
|
||||||
43
.github/workflows/dependabot_auto.yml
vendored
43
.github/workflows/dependabot_auto.yml
vendored
@@ -1,43 +0,0 @@
|
|||||||
name: Dependabot auto-merge
|
|
||||||
on:
|
|
||||||
- pull_request_target
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
packages: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
dependabot:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: ${{ github.actor == 'dependabot[bot]' }}
|
|
||||||
steps:
|
|
||||||
- name: Dependabot metadata
|
|
||||||
id: metadata
|
|
||||||
uses: dependabot/fetch-metadata@v2.2.0
|
|
||||||
with:
|
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
|
||||||
skip-commit-verification: true
|
|
||||||
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Approve a PR if not already approved
|
|
||||||
run: |
|
|
||||||
gh pr checkout "$PR_URL"
|
|
||||||
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
|
|
||||||
then
|
|
||||||
gh pr review --approve "$PR_URL"
|
|
||||||
else
|
|
||||||
echo "PR already approved.";
|
|
||||||
fi
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
|
|
||||||
- name: Enable auto-merge for Dependabot PRs
|
|
||||||
if: ${{ contains(github.event.pull_request.title, 'bump')}}
|
|
||||||
run: gh pr merge --auto --squash "$PR_URL"
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
64
.github/workflows/deploy-explorer.yaml
vendored
64
.github/workflows/deploy-explorer.yaml
vendored
@@ -1,64 +0,0 @@
|
|||||||
name: Explorer deployment
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
tags:
|
|
||||||
- 'v*'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-linux:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
make protogen-go
|
|
||||||
- name: Build api
|
|
||||||
run: |
|
|
||||||
CGO_ENABLED=0 make build-api
|
|
||||||
- name: rm
|
|
||||||
uses: appleboy/ssh-action@v1.1.0
|
|
||||||
with:
|
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
|
||||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
|
||||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
|
||||||
script: |
|
|
||||||
sudo rm -rf local-ai/ || true
|
|
||||||
- name: copy file via ssh
|
|
||||||
uses: appleboy/scp-action@v0.1.7
|
|
||||||
with:
|
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
|
||||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
|
||||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
|
||||||
source: "local-ai"
|
|
||||||
overwrite: true
|
|
||||||
rm: true
|
|
||||||
target: ./local-ai
|
|
||||||
- name: restarting
|
|
||||||
uses: appleboy/ssh-action@v1.1.0
|
|
||||||
with:
|
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
|
||||||
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
|
||||||
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
|
||||||
script: |
|
|
||||||
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
|
|
||||||
sudo systemctl restart local-ai
|
|
||||||
83
.github/workflows/disabled/comment-pr.yaml
vendored
83
.github/workflows/disabled/comment-pr.yaml
vendored
@@ -1,83 +0,0 @@
|
|||||||
name: Comment PRs
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
comment-pr:
|
|
||||||
env:
|
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
ref: "${{ github.event.pull_request.merge_commit_sha }}"
|
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
|
||||||
- uses: mudler/localai-github-action@v1
|
|
||||||
with:
|
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
|
||||||
id: git-diff-action
|
|
||||||
with:
|
|
||||||
json_diff_file_output: diff.json
|
|
||||||
raw_diff_file_output: diff.txt
|
|
||||||
file_output_only: "true"
|
|
||||||
base_branch: ${{ github.event.pull_request.base.sha }}
|
|
||||||
- name: Show diff
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
run: |
|
|
||||||
cat $DIFF
|
|
||||||
- name: Summarize
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$(cat $DIFF)"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
echo "Summary:"
|
|
||||||
echo "$summary"
|
|
||||||
echo "payload sent"
|
|
||||||
echo "$json_payload"
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
docker logs --tail 10 local-ai
|
|
||||||
- uses: mshick/add-pr-comment@v2
|
|
||||||
if: always()
|
|
||||||
with:
|
|
||||||
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
|
||||||
message: ${{ steps.summarize.outputs.message }}
|
|
||||||
message-failure: |
|
|
||||||
Uh oh! Could not analyze this PR, maybe it's too big?
|
|
||||||
94
.github/workflows/generate_grpc_cache.yaml
vendored
94
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -1,94 +0,0 @@
|
|||||||
name: 'generate and publish GRPC docker caches'
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
generate_caches:
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- grpc-base-image: ubuntu:22.04
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
|
||||||
runs-on: ${{matrix.runs-on}}
|
|
||||||
steps:
|
|
||||||
- name: Release space from worker
|
|
||||||
if: matrix.runs-on == 'ubuntu-latest'
|
|
||||||
run: |
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
df -h
|
|
||||||
echo
|
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get remove -y microsoft-edge-stable || true
|
|
||||||
sudo apt-get remove -y firefox || true
|
|
||||||
sudo apt-get remove -y powershell || true
|
|
||||||
sudo apt-get remove -y r-base-core || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
sudo rm -rf /usr/share/dotnet || true
|
|
||||||
sudo rm -rf /opt/ghc || true
|
|
||||||
sudo rm -rf "/usr/local/share/boost" || true
|
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@master
|
|
||||||
with:
|
|
||||||
platforms: all
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
id: buildx
|
|
||||||
uses: docker/setup-buildx-action@master
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Cache GRPC
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
|
||||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
|
||||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
|
||||||
build-args: |
|
|
||||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
|
||||||
GRPC_VERSION=v1.65.0
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
cache-to: type=gha,ignore-error=true
|
|
||||||
cache-from: type=gha
|
|
||||||
target: grpc
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
push: false
|
|
||||||
59
.github/workflows/generate_intel_image.yaml
vendored
59
.github/workflows/generate_intel_image.yaml
vendored
@@ -1,59 +0,0 @@
|
|||||||
name: 'generate and publish intel docker caches'
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
generate_caches:
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
runs-on: ${{matrix.runs-on}}
|
|
||||||
steps:
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@master
|
|
||||||
with:
|
|
||||||
platforms: all
|
|
||||||
- name: Login to DockerHub
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
||||||
|
|
||||||
- name: Login to quay
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/login-action@v3
|
|
||||||
with:
|
|
||||||
registry: quay.io
|
|
||||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
||||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
id: buildx
|
|
||||||
uses: docker/setup-buildx-action@master
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Cache Intel images
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BASE_IMAGE=${{ matrix.base-image }}
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
tags: quay.io/go-skynet/intel-oneapi-base:latest
|
|
||||||
push: true
|
|
||||||
target: intel
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
170
.github/workflows/image-pr.yml
vendored
170
.github/workflows/image-pr.yml
vendored
@@ -22,8 +22,6 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -32,22 +30,20 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# This is basically covered by the AIO test
|
- build-type: ''
|
||||||
# - build-type: ''
|
platforms: 'linux/amd64'
|
||||||
# platforms: 'linux/amd64'
|
tag-latest: 'false'
|
||||||
# tag-latest: 'false'
|
tag-suffix: '-ffmpeg'
|
||||||
# tag-suffix: '-ffmpeg'
|
ffmpeg: 'true'
|
||||||
# ffmpeg: 'true'
|
image-type: 'extras'
|
||||||
# image-type: 'extras'
|
runs-on: 'arc-runner-set'
|
||||||
# runs-on: 'arc-runner-set'
|
base-image: "ubuntu:22.04"
|
||||||
# base-image: "ubuntu:22.04"
|
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
@@ -55,86 +51,66 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
- build-type: 'hipblas'
|
||||||
# - build-type: 'hipblas'
|
platforms: 'linux/amd64'
|
||||||
# platforms: 'linux/amd64'
|
tag-latest: 'false'
|
||||||
# tag-latest: 'false'
|
tag-suffix: '-hipblas'
|
||||||
# tag-suffix: '-hipblas'
|
ffmpeg: 'false'
|
||||||
# ffmpeg: 'false'
|
image-type: 'extras'
|
||||||
# image-type: 'extras'
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
# base-image: "rocm/dev-ubuntu-22.04:6.1"
|
runs-on: 'arc-runner-set'
|
||||||
# grpc-base-image: "ubuntu:22.04"
|
- build-type: 'sycl_f16'
|
||||||
# runs-on: 'arc-runner-set'
|
platforms: 'linux/amd64'
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
tag-latest: 'false'
|
||||||
# - build-type: 'sycl_f16'
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
# platforms: 'linux/amd64'
|
tag-suffix: 'sycl-f16-ffmpeg'
|
||||||
# tag-latest: 'false'
|
ffmpeg: 'true'
|
||||||
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
image-type: 'extras'
|
||||||
# grpc-base-image: "ubuntu:22.04"
|
runs-on: 'arc-runner-set'
|
||||||
# tag-suffix: 'sycl-f16-ffmpeg'
|
core-image-build:
|
||||||
# ffmpeg: 'true'
|
uses: ./.github/workflows/image_build.yml
|
||||||
# image-type: 'extras'
|
with:
|
||||||
# runs-on: 'arc-runner-set'
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
# core-image-build:
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
# uses: ./.github/workflows/image_build.yml
|
image-type: ${{ matrix.image-type }}
|
||||||
# with:
|
build-type: ${{ matrix.build-type }}
|
||||||
# tag-latest: ${{ matrix.tag-latest }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
# tag-suffix: ${{ matrix.tag-suffix }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
# ffmpeg: ${{ matrix.ffmpeg }}
|
platforms: ${{ matrix.platforms }}
|
||||||
# image-type: ${{ matrix.image-type }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
# build-type: ${{ matrix.build-type }}
|
base-image: ${{ matrix.base-image }}
|
||||||
# cuda-major-version: ${{ matrix.cuda-major-version }}
|
secrets:
|
||||||
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
# platforms: ${{ matrix.platforms }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
# runs-on: ${{ matrix.runs-on }}
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
# base-image: ${{ matrix.base-image }}
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
# grpc-base-image: ${{ matrix.grpc-base-image }}
|
strategy:
|
||||||
# makeflags: ${{ matrix.makeflags }}
|
matrix:
|
||||||
# secrets:
|
include:
|
||||||
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
- build-type: ''
|
||||||
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
platforms: 'linux/amd64'
|
||||||
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
tag-latest: 'false'
|
||||||
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
tag-suffix: '-ffmpeg-core'
|
||||||
# strategy:
|
ffmpeg: 'true'
|
||||||
# matrix:
|
image-type: 'core'
|
||||||
# include:
|
runs-on: 'ubuntu-latest'
|
||||||
# - build-type: ''
|
base-image: "ubuntu:22.04"
|
||||||
# platforms: 'linux/amd64'
|
- build-type: 'sycl_f16'
|
||||||
# tag-latest: 'false'
|
platforms: 'linux/amd64'
|
||||||
# tag-suffix: '-ffmpeg-core'
|
tag-latest: 'false'
|
||||||
# ffmpeg: 'true'
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
# image-type: 'core'
|
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||||
# runs-on: 'ubuntu-latest'
|
ffmpeg: 'true'
|
||||||
# base-image: "ubuntu:22.04"
|
image-type: 'core'
|
||||||
# makeflags: "--jobs=4 --output-sync=target"
|
runs-on: 'arc-runner-set'
|
||||||
# - build-type: 'sycl_f16'
|
- build-type: 'cublas'
|
||||||
# platforms: 'linux/amd64'
|
cuda-major-version: "12"
|
||||||
# tag-latest: 'false'
|
cuda-minor-version: "1"
|
||||||
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
platforms: 'linux/amd64'
|
||||||
# grpc-base-image: "ubuntu:22.04"
|
tag-latest: 'false'
|
||||||
# tag-suffix: 'sycl-f16-ffmpeg-core'
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
# ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
# image-type: 'core'
|
image-type: 'core'
|
||||||
# runs-on: 'arc-runner-set'
|
runs-on: 'ubuntu-latest'
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
base-image: "ubuntu:22.04"
|
||||||
# - build-type: 'cublas'
|
|
||||||
# cuda-major-version: "12"
|
|
||||||
# cuda-minor-version: "0"
|
|
||||||
# platforms: 'linux/amd64'
|
|
||||||
# tag-latest: 'false'
|
|
||||||
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
|
||||||
# ffmpeg: 'true'
|
|
||||||
# image-type: 'core'
|
|
||||||
# runs-on: 'ubuntu-latest'
|
|
||||||
# base-image: "ubuntu:22.04"
|
|
||||||
# makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
# - build-type: 'vulkan'
|
|
||||||
# platforms: 'linux/amd64'
|
|
||||||
# tag-latest: 'false'
|
|
||||||
# tag-suffix: '-vulkan-ffmpeg-core'
|
|
||||||
# ffmpeg: 'true'
|
|
||||||
# image-type: 'core'
|
|
||||||
# runs-on: 'ubuntu-latest'
|
|
||||||
# base-image: "ubuntu:22.04"
|
|
||||||
# makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
210
.github/workflows/image.yml
vendored
210
.github/workflows/image.yml
vendored
@@ -13,78 +13,6 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
hipblas-jobs:
|
|
||||||
uses: ./.github/workflows/image_build.yml
|
|
||||||
with:
|
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
image-type: ${{ matrix.image-type }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
runs-on: ${{ matrix.runs-on }}
|
|
||||||
base-image: ${{ matrix.base-image }}
|
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
aio: ${{ matrix.aio }}
|
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
secrets:
|
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
||||||
strategy:
|
|
||||||
# Pushing with all jobs in parallel
|
|
||||||
# eats the bandwidth of all the nodes
|
|
||||||
max-parallel: 2
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'extras'
|
|
||||||
aio: "-aio-gpu-hipblas"
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
latest-image: 'latest-gpu-hipblas'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-core'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
self-hosted-jobs:
|
self-hosted-jobs:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
@@ -98,11 +26,6 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
aio: ${{ matrix.aio }}
|
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -111,7 +34,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Extra images
|
# Extra images
|
||||||
@@ -124,16 +47,14 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-ffmpeg'
|
tag-suffix: '-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -144,10 +65,9 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12'
|
tag-suffix: '-cublas-cuda12'
|
||||||
@@ -155,35 +75,26 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-11"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-12"
|
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
@@ -193,74 +104,87 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'extras'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-ffmpeg'
|
tag-suffix: '-sycl-f16-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f16"
|
|
||||||
latest-image: 'latest-gpu-intel-f16'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-ffmpeg'
|
tag-suffix: '-sycl-f32-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f32"
|
|
||||||
latest-image: 'latest-gpu-intel-f32'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
# Core images
|
# Core images
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-core'
|
tag-suffix: '-sycl-f16-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-core'
|
tag-suffix: '-sycl-f32-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-core'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
@@ -274,33 +198,23 @@ jobs:
|
|||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
aio: ${{ matrix.aio }}
|
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
strategy:
|
strategy:
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-ffmpeg-core'
|
tag-suffix: '-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'ubuntu-latest'
|
||||||
aio: "-aio-cpu"
|
|
||||||
latest-image: 'latest-cpu'
|
|
||||||
latest-image-aio: 'latest-aio-cpu'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -310,19 +224,17 @@ jobs:
|
|||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-core'
|
tag-suffix: '-cublas-cuda12-core'
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -331,27 +243,15 @@ jobs:
|
|||||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "1"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
- build-type: 'vulkan'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-vulkan-ffmpeg-core'
|
|
||||||
latest-image: 'latest-vulkan-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
|
|||||||
170
.github/workflows/image_build.yml
vendored
170
.github/workflows/image_build.yml
vendored
@@ -6,10 +6,6 @@ on:
|
|||||||
inputs:
|
inputs:
|
||||||
base-image:
|
base-image:
|
||||||
description: 'Base image'
|
description: 'Base image'
|
||||||
required: true
|
|
||||||
type: string
|
|
||||||
grpc-base-image:
|
|
||||||
description: 'GRPC Base image, must be a compatible image with base-image'
|
|
||||||
required: false
|
required: false
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
@@ -19,11 +15,11 @@ on:
|
|||||||
type: string
|
type: string
|
||||||
cuda-major-version:
|
cuda-major-version:
|
||||||
description: 'CUDA major version'
|
description: 'CUDA major version'
|
||||||
default: "12"
|
default: "11"
|
||||||
type: string
|
type: string
|
||||||
cuda-minor-version:
|
cuda-minor-version:
|
||||||
description: 'CUDA minor version'
|
description: 'CUDA minor version'
|
||||||
default: "4"
|
default: "7"
|
||||||
type: string
|
type: string
|
||||||
platforms:
|
platforms:
|
||||||
description: 'Platforms'
|
description: 'Platforms'
|
||||||
@@ -33,14 +29,6 @@ on:
|
|||||||
description: 'Tag latest'
|
description: 'Tag latest'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
latest-image:
|
|
||||||
description: 'Tag latest'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
latest-image-aio:
|
|
||||||
description: 'Tag latest'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
tag-suffix:
|
tag-suffix:
|
||||||
description: 'Tag suffix'
|
description: 'Tag suffix'
|
||||||
default: ''
|
default: ''
|
||||||
@@ -58,16 +46,6 @@ on:
|
|||||||
required: true
|
required: true
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
makeflags:
|
|
||||||
description: 'Make Flags'
|
|
||||||
required: false
|
|
||||||
default: '--jobs=4 --output-sync=target'
|
|
||||||
type: string
|
|
||||||
aio:
|
|
||||||
description: 'AIO Image Name'
|
|
||||||
required: false
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername:
|
dockerUsername:
|
||||||
required: true
|
required: true
|
||||||
@@ -91,7 +69,6 @@ jobs:
|
|||||||
&& sudo apt-get install -y git
|
&& sudo apt-get install -y git
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Release space from worker
|
- name: Release space from worker
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
if: inputs.runs-on == 'ubuntu-latest'
|
||||||
run: |
|
run: |
|
||||||
@@ -133,10 +110,8 @@ jobs:
|
|||||||
sudo rm -rf "/usr/local/share/boost" || true
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
df -h
|
df -h
|
||||||
|
|
||||||
- name: Docker meta
|
- name: Docker meta
|
||||||
id: meta
|
id: meta
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
@@ -149,46 +124,6 @@ jobs:
|
|||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.tag-suffix }}
|
suffix=${{ inputs.tag-suffix }}
|
||||||
- name: Docker meta for PR
|
|
||||||
id: meta_pull_request
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
ttl.sh/localai-ci-pr-${{ github.event.number }}
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch
|
|
||||||
type=semver,pattern={{raw}}
|
|
||||||
type=sha
|
|
||||||
flavor: |
|
|
||||||
latest=${{ inputs.tag-latest }}
|
|
||||||
suffix=${{ inputs.tag-suffix }}
|
|
||||||
- name: Docker meta AIO (quay.io)
|
|
||||||
if: inputs.aio != ''
|
|
||||||
id: meta_aio
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
quay.io/go-skynet/local-ai
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch
|
|
||||||
type=semver,pattern={{raw}}
|
|
||||||
flavor: |
|
|
||||||
latest=${{ inputs.tag-latest }}
|
|
||||||
suffix=${{ inputs.aio }}
|
|
||||||
|
|
||||||
- name: Docker meta AIO (dockerhub)
|
|
||||||
if: inputs.aio != ''
|
|
||||||
id: meta_aio_dockerhub
|
|
||||||
uses: docker/metadata-action@v5
|
|
||||||
with:
|
|
||||||
images: |
|
|
||||||
localai/localai
|
|
||||||
tags: |
|
|
||||||
type=ref,event=branch
|
|
||||||
type=semver,pattern={{raw}}
|
|
||||||
flavor: |
|
|
||||||
suffix=${{ inputs.aio }}
|
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@master
|
uses: docker/setup-qemu-action@master
|
||||||
@@ -215,14 +150,9 @@ jobs:
|
|||||||
password: ${{ secrets.quayPassword }}
|
password: ${{ secrets.quayPassword }}
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v5
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
|
||||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
|
||||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
|
||||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
|
||||||
build-args: |
|
build-args: |
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
@@ -230,106 +160,12 @@ jobs:
|
|||||||
FFMPEG=${{ inputs.ffmpeg }}
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
IMAGE_TYPE=${{ inputs.image-type }}
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
|
||||||
GRPC_VERSION=v1.65.0
|
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
### Start testing image
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
|
||||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
|
||||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
|
||||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
|
||||||
build-args: |
|
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
|
||||||
FFMPEG=${{ inputs.ffmpeg }}
|
|
||||||
IMAGE_TYPE=${{ inputs.image-type }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
|
||||||
GRPC_VERSION=v1.65.0
|
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
cache-from: type=gha
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
push: true
|
|
||||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
|
||||||
- name: Testing image
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
run: |
|
|
||||||
echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
## End testing image
|
|
||||||
- name: Build and push AIO image
|
|
||||||
if: inputs.aio != ''
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile.aio
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
|
||||||
tags: ${{ steps.meta_aio.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta_aio.outputs.labels }}
|
|
||||||
|
|
||||||
- name: Build and push AIO image (dockerhub)
|
|
||||||
if: inputs.aio != ''
|
|
||||||
uses: docker/build-push-action@v6
|
|
||||||
with:
|
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
|
||||||
build-args: |
|
|
||||||
BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
|
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile.aio
|
|
||||||
platforms: ${{ inputs.platforms }}
|
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
|
||||||
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
|
||||||
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
|
||||||
|
|
||||||
- name: Latest tag
|
|
||||||
# run this on branches, when it is a tag and there is a latest-image defined
|
|
||||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
|
||||||
run: |
|
|
||||||
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
|
||||||
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
|
|
||||||
docker push localai/localai:${{ inputs.latest-image }}
|
|
||||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
|
||||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
|
||||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
|
||||||
- name: Latest AIO tag
|
|
||||||
# run this on branches, when it is a tag and there is a latest-image defined
|
|
||||||
if: github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag'
|
|
||||||
run: |
|
|
||||||
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
|
|
||||||
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
|
|
||||||
docker push localai/localai:${{ inputs.latest-image-aio }}
|
|
||||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
|
||||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
|
||||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
|
||||||
|
|
||||||
- name: job summary
|
- name: job summary
|
||||||
run: |
|
run: |
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
- name: job summary(AIO)
|
|
||||||
if: inputs.aio != ''
|
|
||||||
run: |
|
|
||||||
echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
|
||||||
|
|||||||
12
.github/workflows/labeler.yml
vendored
12
.github/workflows/labeler.yml
vendored
@@ -1,12 +0,0 @@
|
|||||||
name: "Pull Request Labeler"
|
|
||||||
on:
|
|
||||||
- pull_request_target
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
labeler:
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: write
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/labeler@v5
|
|
||||||
35
.github/workflows/localaibot_automerge.yml
vendored
35
.github/workflows/localaibot_automerge.yml
vendored
@@ -1,35 +0,0 @@
|
|||||||
name: LocalAI-bot auto-merge
|
|
||||||
on:
|
|
||||||
- pull_request_target
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
packages: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
dependabot:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: ${{ github.actor == 'localai-bot' }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Approve a PR if not already approved
|
|
||||||
run: |
|
|
||||||
gh pr checkout "$PR_URL"
|
|
||||||
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
|
|
||||||
then
|
|
||||||
gh pr review --approve "$PR_URL"
|
|
||||||
else
|
|
||||||
echo "PR already approved.";
|
|
||||||
fi
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
|
|
||||||
- name: Enable auto-merge for LocalAIBot PRs
|
|
||||||
run: gh pr merge --auto --squash "$PR_URL"
|
|
||||||
env:
|
|
||||||
PR_URL: ${{github.event.pull_request.html_url}}
|
|
||||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
168
.github/workflows/notify-models.yaml
vendored
168
.github/workflows/notify-models.yaml
vendored
@@ -1,168 +0,0 @@
|
|||||||
name: Notifications for new models
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
types:
|
|
||||||
- closed
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
notify-discord:
|
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
|
||||||
env:
|
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
|
||||||
- uses: mudler/localai-github-action@v1
|
|
||||||
with:
|
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
|
||||||
id: git-diff-action
|
|
||||||
with:
|
|
||||||
json_diff_file_output: diff.json
|
|
||||||
raw_diff_file_output: diff.txt
|
|
||||||
file_output_only: "true"
|
|
||||||
- name: Summarize
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$(cat $DIFF)"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
echo "Summary:"
|
|
||||||
echo "$summary"
|
|
||||||
echo "payload sent"
|
|
||||||
echo "$json_payload"
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
docker logs --tail 10 local-ai
|
|
||||||
- name: Discord notification
|
|
||||||
env:
|
|
||||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
|
||||||
DISCORD_USERNAME: "LocalAI-Bot"
|
|
||||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
|
||||||
uses: Ilshidur/action-discord@master
|
|
||||||
with:
|
|
||||||
args: ${{ steps.summarize.outputs.message }}
|
|
||||||
- name: Setup tmate session if fails
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
notify-twitter:
|
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
|
||||||
env:
|
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
|
||||||
- name: Start LocalAI
|
|
||||||
run: |
|
|
||||||
echo "Starting LocalAI..."
|
|
||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
|
||||||
id: git-diff-action
|
|
||||||
with:
|
|
||||||
json_diff_file_output: diff.json
|
|
||||||
raw_diff_file_output: diff.txt
|
|
||||||
file_output_only: "true"
|
|
||||||
- name: Summarize
|
|
||||||
env:
|
|
||||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$(cat $DIFF)"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
echo "Summary:"
|
|
||||||
echo "$summary"
|
|
||||||
echo "payload sent"
|
|
||||||
echo "$json_payload"
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
docker logs --tail 10 local-ai
|
|
||||||
- uses: Eomm/why-don-t-you-tweet@v2
|
|
||||||
with:
|
|
||||||
tweet-message: ${{ steps.summarize.outputs.message }}
|
|
||||||
env:
|
|
||||||
# Get your tokens from https://developer.twitter.com/apps
|
|
||||||
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
|
|
||||||
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
|
|
||||||
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
|
|
||||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
|
||||||
- name: Setup tmate session if fails
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
63
.github/workflows/notify-releases.yaml
vendored
63
.github/workflows/notify-releases.yaml
vendored
@@ -1,63 +0,0 @@
|
|||||||
name: Release notifications
|
|
||||||
on:
|
|
||||||
release:
|
|
||||||
types:
|
|
||||||
- published
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
notify-discord:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
RELEASE_BODY: ${{ github.event.release.body }}
|
|
||||||
RELEASE_TITLE: ${{ github.event.release.name }}
|
|
||||||
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
|
|
||||||
steps:
|
|
||||||
- uses: mudler/localai-github-action@v1
|
|
||||||
with:
|
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
|
||||||
- name: Summarize
|
|
||||||
id: summarize
|
|
||||||
run: |
|
|
||||||
input="$RELEASE_TITLE\b$RELEASE_BODY"
|
|
||||||
|
|
||||||
# Define the LocalAI API endpoint
|
|
||||||
API_URL="http://localhost:8080/chat/completions"
|
|
||||||
|
|
||||||
# Create a JSON payload using jq to handle special characters
|
|
||||||
json_payload=$(jq -n --arg input "$input" '{
|
|
||||||
model: "'$MODEL_NAME'",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content: "Write a discord message with a bullet point summary of the release notes."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: $input
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}')
|
|
||||||
|
|
||||||
# Send the request to LocalAI API
|
|
||||||
response=$(curl -s -X POST $API_URL \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$json_payload")
|
|
||||||
|
|
||||||
# Extract the summary from the response
|
|
||||||
summary=$(echo $response | jq -r '.choices[0].message.content')
|
|
||||||
|
|
||||||
# Print the summary
|
|
||||||
# -H "Authorization: Bearer $API_KEY" \
|
|
||||||
{
|
|
||||||
echo 'message<<EOF'
|
|
||||||
echo "$summary"
|
|
||||||
echo EOF
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
- name: Discord notification
|
|
||||||
env:
|
|
||||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
|
|
||||||
DISCORD_USERNAME: "LocalAI-Bot"
|
|
||||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
|
||||||
uses: Ilshidur/action-discord@master
|
|
||||||
with:
|
|
||||||
args: ${{ steps.summarize.outputs.message }}
|
|
||||||
28
.github/workflows/prlint.yaml
vendored
28
.github/workflows/prlint.yaml
vendored
@@ -1,28 +0,0 @@
|
|||||||
name: Check PR style
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
types:
|
|
||||||
- opened
|
|
||||||
- reopened
|
|
||||||
- edited
|
|
||||||
- synchronize
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
title-lint:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
statuses: write
|
|
||||||
steps:
|
|
||||||
- uses: aslafy-z/conventional-pr-title-action@v3
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
# check-pr-description:
|
|
||||||
# runs-on: ubuntu-latest
|
|
||||||
# steps:
|
|
||||||
# - uses: actions/checkout@v2
|
|
||||||
# - uses: jadrol/pr-description-checker-action@v1.0.0
|
|
||||||
# id: description-checker
|
|
||||||
# with:
|
|
||||||
# repo-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
# exempt-labels: no qa
|
|
||||||
332
.github/workflows/release.yaml
vendored
332
.github/workflows/release.yaml
vendored
@@ -1,15 +1,6 @@
|
|||||||
name: Build and Release
|
name: Build and Release
|
||||||
|
|
||||||
on:
|
on: push
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
tags:
|
|
||||||
- 'v*'
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
env:
|
|
||||||
GRPC_VERSION: v1.65.0
|
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
@@ -19,224 +10,85 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
build-linux:
|
||||||
build-linux-arm:
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build: 'avx2'
|
||||||
|
defines: ''
|
||||||
|
- build: 'avx'
|
||||||
|
defines: '-DLLAMA_AVX2=OFF'
|
||||||
|
- build: 'avx512'
|
||||||
|
defines: '-DLLAMA_AVX512=ON'
|
||||||
|
- build: 'cuda12'
|
||||||
|
defines: ''
|
||||||
|
- build: 'cuda11'
|
||||||
|
defines: ''
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '>=1.21.0'
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
sudo apt-get install build-essential ffmpeg
|
||||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
|
||||||
- name: Install CUDA Dependencies
|
|
||||||
run: |
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
|
||||||
env:
|
|
||||||
CUDA_VERSION: 12-4
|
|
||||||
- name: Cache grpc
|
|
||||||
id: cache-grpc
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: grpc
|
|
||||||
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
|
|
||||||
- name: Build grpc
|
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
|
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
|
||||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
../.. && sudo make --jobs 5 --output-sync=target
|
|
||||||
- name: Install gRPC
|
|
||||||
run: |
|
|
||||||
GNU_HOST=aarch64-linux-gnu
|
|
||||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
|
||||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
|
||||||
|
|
||||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
|
||||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
|
||||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
|
||||||
|
|
||||||
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
|
|
||||||
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
|
||||||
GRPC_DIR=$PWD/grpc
|
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
|
||||||
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
|
||||||
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
|
||||||
cd $GRPC_CROSS_BUILD_DIR && \
|
|
||||||
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
|
|
||||||
../.. && \
|
|
||||||
sudo make -j`nproc` install
|
|
||||||
- name: Build
|
|
||||||
id: build
|
|
||||||
run: |
|
|
||||||
GNU_HOST=aarch64-linux-gnu
|
|
||||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
|
||||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
|
||||||
|
|
||||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
|
||||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
|
||||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
|
||||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
|
||||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
|
||||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
|
||||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
|
||||||
GOOS=linux \
|
|
||||||
GOARCH=arm64 \
|
|
||||||
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: LocalAI-linux-arm64
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
build-linux:
|
|
||||||
runs-on: arc-runner-set
|
|
||||||
steps:
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
|
||||||
- name: Intel Dependencies
|
|
||||||
run: |
|
|
||||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
|
||||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y intel-basekit
|
|
||||||
- name: Install CUDA Dependencies
|
- name: Install CUDA Dependencies
|
||||||
|
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
|
||||||
run: |
|
run: |
|
||||||
|
if [ "${{ matrix.build }}" == "cuda12" ]; then
|
||||||
|
export CUDA_VERSION=12-3
|
||||||
|
else
|
||||||
|
export CUDA_VERSION=11-7
|
||||||
|
fi
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
env:
|
|
||||||
CUDA_VERSION: 12-5
|
|
||||||
- name: "Install Hipblas"
|
|
||||||
env:
|
|
||||||
ROCM_VERSION: "6.1"
|
|
||||||
AMDGPU_VERSION: "6.1"
|
|
||||||
run: |
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
sudo apt-get update
|
|
||||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
|
|
||||||
|
|
||||||
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
|
|
||||||
|
|
||||||
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
|
|
||||||
|
|
||||||
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
|
||||||
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
|
||||||
sudo apt-get update
|
|
||||||
|
|
||||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
|
||||||
hipblas-dev rocm-dev \
|
|
||||||
rocblas-dev
|
|
||||||
|
|
||||||
sudo apt-get clean
|
|
||||||
sudo rm -rf /var/lib/apt/lists/*
|
|
||||||
sudo ldconfig
|
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v3
|
||||||
with:
|
with:
|
||||||
path: grpc
|
path: grpc
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
key: ${{ runner.os }}-grpc
|
||||||
- name: Build grpc
|
- name: Build grpc
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make --jobs 5 --output-sync=target
|
../.. && sudo make -j12
|
||||||
- name: Install gRPC
|
- name: Install gRPC
|
||||||
run: |
|
run: |
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
cd grpc && cd cmake/build && sudo make -j12 install
|
||||||
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
|
env:
|
||||||
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
export BUILD_TYPE=cublas
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
export PATH=/opt/rocm/bin:$PATH
|
make dist
|
||||||
source /opt/intel/oneapi/setvars.sh
|
else
|
||||||
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
STATIC=true make dist
|
||||||
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
|
fi
|
||||||
make -j4 dist
|
- uses: actions/upload-artifact@v3
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
with:
|
||||||
name: LocalAI-linux
|
name: ${{ matrix.build }}
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
- name: Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v1
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
build-stablediffusion:
|
build-stablediffusion:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -244,114 +96,66 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '>=1.21.0'
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get install -y --no-install-recommends libopencv-dev
|
||||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Build stablediffusion
|
- name: Build stablediffusion
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
make backend-assets/grpc/stablediffusion
|
make backend-assets/grpc/stablediffusion
|
||||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||||
env:
|
- uses: actions/upload-artifact@v3
|
||||||
GO_TAGS: stablediffusion
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
with:
|
||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
- name: Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v1
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
release/*
|
release/*
|
||||||
|
|
||||||
build-macOS-x86_64:
|
build-macOS:
|
||||||
runs-on: macos-13
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build: 'avx2'
|
||||||
|
defines: ''
|
||||||
|
- build: 'avx'
|
||||||
|
defines: '-DLLAMA_AVX2=OFF'
|
||||||
|
- build: 'avx512'
|
||||||
|
defines: '-DLLAMA_AVX512=ON'
|
||||||
|
runs-on: macOS-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '>=1.21.0'
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
brew install protobuf grpc
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
|
env:
|
||||||
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
|
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: LocalAI-MacOS-x86_64
|
name: ${{ matrix.build }}
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
- name: Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v1
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
build-macOS-arm64:
|
|
||||||
runs-on: macos-14
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
brew install protobuf grpc libomp llvm
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Build
|
|
||||||
id: build
|
|
||||||
run: |
|
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
|
||||||
make dist
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: LocalAI-MacOS-arm64
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|||||||
30
.github/workflows/secscan.yaml
vendored
30
.github/workflows/secscan.yaml
vendored
@@ -1,30 +0,0 @@
|
|||||||
name: "Security Scan"
|
|
||||||
|
|
||||||
# Run workflow each time code is pushed to your repository and on a schedule.
|
|
||||||
# The scheduled workflow runs every at 00:00 on Sunday UTC time.
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * 0'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
tests:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
GO111MODULE: on
|
|
||||||
steps:
|
|
||||||
- name: Checkout Source
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
|
||||||
- name: Run Gosec Security Scanner
|
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
|
||||||
uses: securego/gosec@v2.21.4
|
|
||||||
with:
|
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
|
||||||
- name: Upload SARIF file
|
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
|
||||||
uses: github/codeql-action/upload-sarif@v3
|
|
||||||
with:
|
|
||||||
# Path to SARIF file relative to the root of the repository
|
|
||||||
sarif_file: results.sarif
|
|
||||||
274
.github/workflows/test-extra.yml
vendored
274
.github/workflows/test-extra.yml
vendored
@@ -25,16 +25,23 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test transformers
|
- name: Test transformers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
make -C backend/python/transformers
|
||||||
|
make -C backend/python/transformers test
|
||||||
|
|
||||||
tests-sentencetransformers:
|
tests-sentencetransformers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -47,39 +54,23 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test sentencetransformers
|
- name: Test sentencetransformers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
make -C backend/python/sentencetransformers
|
||||||
|
make -C backend/python/sentencetransformers test
|
||||||
|
|
||||||
tests-rerankers:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test rerankers
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/rerankers
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/rerankers test
|
|
||||||
|
|
||||||
tests-diffusers:
|
tests-diffusers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -91,68 +82,25 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
# Install UV
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test diffusers
|
- name: Test diffusers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make -C backend/python/diffusers
|
||||||
|
make -C backend/python/diffusers test
|
||||||
|
|
||||||
tests-parler-tts:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test parler-tts
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
|
||||||
|
|
||||||
tests-openvoice:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test openvoice
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
|
||||||
|
|
||||||
tests-transformers-musicgen:
|
tests-transformers-musicgen:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -165,16 +113,56 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test transformers-musicgen
|
- name: Test transformers-musicgen
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
make -C backend/python/transformers-musicgen
|
||||||
|
make -C backend/python/transformers-musicgen test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
tests-petals:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test petals
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make -C backend/python/petals
|
||||||
|
make -C backend/python/petals test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
@@ -227,16 +215,23 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# sudo apt-get update
|
# sudo apt-get update
|
||||||
# sudo apt-get install build-essential ffmpeg
|
# sudo apt-get install build-essential ffmpeg
|
||||||
# # Install UV
|
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo apt-get update && \
|
||||||
|
# sudo apt-get install -y conda
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
# - name: Test bark
|
# - name: Test bark
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark
|
# export PATH=$PATH:/opt/conda/bin
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
# make -C backend/python/bark
|
||||||
|
# make -C backend/python/bark test
|
||||||
|
|
||||||
|
|
||||||
# Below tests needs GPU. Commented out for now
|
# Below tests needs GPU. Commented out for now
|
||||||
@@ -252,15 +247,21 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# sudo apt-get update
|
# sudo apt-get update
|
||||||
# sudo apt-get install build-essential ffmpeg
|
# sudo apt-get install build-essential ffmpeg
|
||||||
# # Install UV
|
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
# sudo apt-get update && \
|
||||||
|
# sudo apt-get install -y conda
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
# - name: Test vllm
|
# - name: Test vllm
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
# export PATH=$PATH:/opt/conda/bin
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
# make -C backend/python/vllm
|
||||||
|
# make -C backend/python/vllm test
|
||||||
tests-vallex:
|
tests-vallex:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -272,15 +273,21 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
- name: Test vall-e-x
|
- name: Test vall-e-x
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
make -C backend/python/vall-e-x
|
||||||
|
make -C backend/python/vall-e-x test
|
||||||
|
|
||||||
tests-coqui:
|
tests-coqui:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -293,11 +300,18 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
# Install UV
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test coqui
|
- name: Test coqui
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
export PATH=$PATH:/opt/conda/bin
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
make -C backend/python/coqui
|
||||||
|
make -C backend/python/coqui test
|
||||||
|
|||||||
141
.github/workflows/test.yml
vendored
141
.github/workflows/test.yml
vendored
@@ -9,9 +9,6 @@ on:
|
|||||||
tags:
|
tags:
|
||||||
- '*'
|
- '*'
|
||||||
|
|
||||||
env:
|
|
||||||
GRPC_VERSION: v1.65.0
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
@@ -60,18 +57,16 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: ${{ matrix.go-version }}
|
go-version: ${{ matrix.go-version }}
|
||||||
cache: false
|
|
||||||
# You can test your matrix by printing the current Go version
|
# You can test your matrix by printing the current Go version
|
||||||
- name: Display Go version
|
- name: Display Go version
|
||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
sudo apt-get install -y libgmock-dev
|
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
@@ -79,26 +74,8 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
# Install UV
|
sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
|
||||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
|
||||||
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
|
||||||
pip install --user grpcio-tools
|
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||||
@@ -108,103 +85,29 @@ jobs:
|
|||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
env:
|
|
||||||
CUDA_VERSION: 12-4
|
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v3
|
||||||
with:
|
with:
|
||||||
path: grpc
|
path: grpc
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
key: ${{ runner.os }}-grpc
|
||||||
- name: Build grpc
|
- name: Build grpc
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||||
run: |
|
run: |
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
../.. && sudo make --jobs 5
|
../.. && sudo make -j12
|
||||||
- name: Install gRPC
|
- name: Install gRPC
|
||||||
run: |
|
run: |
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make -j12 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
GO_TAGS="stablediffusion tts" make test
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-aio-container:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Release space from worker
|
|
||||||
run: |
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
df -h
|
|
||||||
echo
|
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
df -h
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
# Install protoc
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
|
||||||
- name: Build images
|
|
||||||
run: |
|
|
||||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
|
||||||
- name: Test
|
|
||||||
run: |
|
|
||||||
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
|
||||||
make run-e2e-aio
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-apple:
|
tests-apple:
|
||||||
runs-on: macOS-14
|
runs-on: macOS-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
go-version: ['1.21.x']
|
go-version: ['1.21.x']
|
||||||
@@ -214,29 +117,17 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: ${{ matrix.go-version }}
|
go-version: ${{ matrix.go-version }}
|
||||||
cache: false
|
|
||||||
# You can test your matrix by printing the current Go version
|
# You can test your matrix by printing the current Go version
|
||||||
- name: Display Go version
|
- name: Display Go version
|
||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
37
.github/workflows/update_swagger.yaml
vendored
37
.github/workflows/update_swagger.yaml
vendored
@@ -1,37 +0,0 @@
|
|||||||
name: Update swagger
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: 0 20 * * *
|
|
||||||
workflow_dispatch:
|
|
||||||
jobs:
|
|
||||||
swagger:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: 'stable'
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install protobuf-compiler
|
|
||||||
- run: |
|
|
||||||
go install github.com/swaggo/swag/cmd/swag@latest
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Bump swagger 🔧
|
|
||||||
run: |
|
|
||||||
make protogen-go swagger
|
|
||||||
- name: Create Pull Request
|
|
||||||
uses: peter-evans/create-pull-request@v7
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
|
||||||
push-to-fork: ci-forks/LocalAI
|
|
||||||
commit-message: 'feat(swagger): update swagger'
|
|
||||||
title: 'feat(swagger): update swagger'
|
|
||||||
branch: "update/swagger"
|
|
||||||
body: Update swagger
|
|
||||||
signoff: true
|
|
||||||
|
|
||||||
18
.github/workflows/yaml-check.yml
vendored
18
.github/workflows/yaml-check.yml
vendored
@@ -1,18 +0,0 @@
|
|||||||
name: 'Yamllint GitHub Actions'
|
|
||||||
on:
|
|
||||||
- pull_request
|
|
||||||
jobs:
|
|
||||||
yamllint:
|
|
||||||
name: 'Yamllint'
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: 'Checkout'
|
|
||||||
uses: actions/checkout@master
|
|
||||||
- name: 'Yamllint'
|
|
||||||
uses: karancode/yamllint-github-action@master
|
|
||||||
with:
|
|
||||||
yamllint_file_or_dir: 'gallery'
|
|
||||||
yamllint_strict: false
|
|
||||||
yamllint_comment: true
|
|
||||||
env:
|
|
||||||
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
18
.gitignore
vendored
18
.gitignore
vendored
@@ -6,9 +6,6 @@ get-sources
|
|||||||
prepare-sources
|
prepare-sources
|
||||||
/backend/cpp/llama/grpc-server
|
/backend/cpp/llama/grpc-server
|
||||||
/backend/cpp/llama/llama.cpp
|
/backend/cpp/llama/llama.cpp
|
||||||
/backend/cpp/llama-*
|
|
||||||
|
|
||||||
*.log
|
|
||||||
|
|
||||||
go-ggml-transformers
|
go-ggml-transformers
|
||||||
go-gpt2
|
go-gpt2
|
||||||
@@ -42,18 +39,3 @@ backend-assets/*
|
|||||||
!backend-assets/.keep
|
!backend-assets/.keep
|
||||||
prepare
|
prepare
|
||||||
/ggml-metal.metal
|
/ggml-metal.metal
|
||||||
docs/static/gallery.html
|
|
||||||
|
|
||||||
# Protobuf generated files
|
|
||||||
*.pb.go
|
|
||||||
*pb2.py
|
|
||||||
*pb2_grpc.py
|
|
||||||
|
|
||||||
# SonarQube
|
|
||||||
.scannerwork
|
|
||||||
|
|
||||||
# backend virtual environments
|
|
||||||
**/venv
|
|
||||||
|
|
||||||
# per-developer customization files for the development container
|
|
||||||
.devcontainer/customization/*
|
|
||||||
5
.vscode/extensions.json
vendored
5
.vscode/extensions.json
vendored
@@ -1,5 +0,0 @@
|
|||||||
{
|
|
||||||
"recommendations": [
|
|
||||||
"golang.go"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
21
.vscode/launch.json
vendored
21
.vscode/launch.json
vendored
@@ -3,12 +3,12 @@
|
|||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
{
|
||||||
"name": "Python: Current File",
|
"name": "Python: Current File",
|
||||||
"type": "debugpy",
|
"type": "python",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${file}",
|
"program": "${file}",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
"cwd": "${fileDirname}",
|
"cwd": "${workspaceFolder}/examples/langchain-chroma",
|
||||||
"env": {
|
"env": {
|
||||||
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
||||||
"OPENAI_API_KEY": "abc"
|
"OPENAI_API_KEY": "abc"
|
||||||
@@ -19,16 +19,15 @@
|
|||||||
"type": "go",
|
"type": "go",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"mode": "debug",
|
"mode": "debug",
|
||||||
"program": "${workspaceRoot}",
|
"program": "${workspaceFolder}/main.go",
|
||||||
"args": [],
|
"args": [
|
||||||
|
"api"
|
||||||
|
],
|
||||||
"env": {
|
"env": {
|
||||||
"LOCALAI_LOG_LEVEL": "debug",
|
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
||||||
"LOCALAI_P2P": "true",
|
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"DEBUG": "true"
|
||||||
},
|
}
|
||||||
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
|
||||||
"envFile": "${workspaceFolder}/.env",
|
|
||||||
"cwd": "${workspaceRoot}"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
# Contributing to LocalAI
|
# Contributing to localAI
|
||||||
|
|
||||||
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
|
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
|
||||||
|
|
||||||
@@ -15,6 +15,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
- [Documentation](#documentation)
|
- [Documentation](#documentation)
|
||||||
- [Community and Communication](#community-and-communication)
|
- [Community and Communication](#community-and-communication)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
@@ -27,9 +29,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
|
|
||||||
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
|
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
|
||||||
2. Navigate to the project directory: `cd LocalAI`
|
2. Navigate to the project directory: `cd LocalAI`
|
||||||
3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
|
3. Install the required dependencies: `make prepare`
|
||||||
4. Build LocalAI: `make build`
|
4. Run LocalAI: `make run`
|
||||||
5. Run LocalAI: `./local-ai`
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
@@ -52,33 +53,20 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
|||||||
|
|
||||||
## Coding Guidelines
|
## Coding Guidelines
|
||||||
|
|
||||||
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
|
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
|
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
|
||||||
|
|
||||||
### Running AIO tests
|
|
||||||
|
|
||||||
All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build the LocalAI docker image
|
|
||||||
make DOCKER_IMAGE=local-ai docker
|
|
||||||
|
|
||||||
# Build the corresponding AIO image
|
|
||||||
BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
|
||||||
|
|
||||||
# Run the AIO e2e tests
|
|
||||||
LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
|
|
||||||
```
|
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
|
- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
|
||||||
|
|
||||||
## Community and Communication
|
## Community and Communication
|
||||||
|
|
||||||
- You can reach out via the Github issue tracker.
|
- You can reach out via the Github issue tracker.
|
||||||
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||||
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
|
---
|
||||||
485
Dockerfile
485
Dockerfile
@@ -1,338 +1,129 @@
|
|||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG BASE_IMAGE=ubuntu:22.04
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
|
||||||
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
|
||||||
|
|
||||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
# extras or core
|
||||||
FROM ${BASE_IMAGE} AS requirements-core
|
FROM ${BASE_IMAGE} as requirements-core
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG GO_VERSION=1.22.6
|
ARG GO_VERSION=1.21.7
|
||||||
ARG CMAKE_VERSION=3.26.4
|
ARG BUILD_TYPE
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
|
ARG CUDA_MINOR_VERSION=7
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
||||||
|
|
||||||
|
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
|
||||||
build-essential \
|
|
||||||
ccache \
|
|
||||||
ca-certificates \
|
|
||||||
curl libssl-dev \
|
|
||||||
git \
|
|
||||||
unzip upx-ucl && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH $PATH:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers
|
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
|
|
||||||
RUN test -n "$TARGETARCH" \
|
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|
||||||
|
|
||||||
# Use the variables in subsequent instructions
|
# Use the variables in subsequent instructions
|
||||||
RUN echo "Target Architecture: $TARGETARCH"
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
RUN echo "Target Variant: $TARGETVARIANT"
|
RUN echo "Target Variant: $TARGETVARIANT"
|
||||||
|
|
||||||
|
# CuBLAS requirements
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||||
|
apt-get install -y software-properties-common && \
|
||||||
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||||
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||||
|
; fi
|
||||||
|
|
||||||
# Cuda
|
# Cuda
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
# HipBLAS requirements
|
# HipBLAS requirements
|
||||||
ENV PATH=/opt/rocm/bin:${PATH}
|
ENV PATH /opt/rocm/bin:${PATH}
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get install -y \
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libopenblas-dev \
|
libopenblas-dev \
|
||||||
libopencv-dev && \
|
libopencv-dev \
|
||||||
apt-get clean && \
|
&& apt-get clean
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Set up OpenCV
|
# Set up OpenCV
|
||||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
###################################
|
RUN test -n "$TARGETARCH" \
|
||||||
###################################
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
# Extras requirements
|
||||||
FROM requirements-core AS requirements-extras
|
FROM requirements-core as requirements-extras
|
||||||
|
|
||||||
|
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y conda && apt-get clean
|
||||||
|
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
RUN apt-get install -y python3-pip && apt-get clean
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
RUN apt-get update && \
|
RUN apt-get install -y espeak-ng espeak && apt-get clean
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
espeak-ng \
|
|
||||||
espeak \
|
|
||||||
python3-pip \
|
|
||||||
python-is-python3 \
|
|
||||||
python3-dev llvm \
|
|
||||||
python3-venv && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
pip install --upgrade pip
|
|
||||||
|
|
||||||
# Install grpcio-tools (the version in 22.04 is too old)
|
RUN if [ ! -e /usr/bin/python ]; then \
|
||||||
RUN pip install --user grpcio-tools
|
ln -s /usr/bin/python3 /usr/bin/python \
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
|
||||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
|
||||||
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
|
||||||
ARG CUDA_MINOR_VERSION=0
|
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
|
||||||
|
|
||||||
# Vulkan requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
vulkan-sdk && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# CuBLAS requirements
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
|
||||||
fi
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
libclblast-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
hipblas-dev \
|
|
||||||
rocblas-dev && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
|
||||||
ldconfig \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# Temporary workaround for Intel's repository to work correctly
|
FROM requirements-${IMAGE_TYPE} as builder
|
||||||
# https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
|
|
||||||
# This is a temporary workaround until Intel fixes their repository
|
|
||||||
FROM ${INTEL_BASE_IMAGE} AS intel
|
|
||||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
|
||||||
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
|
||||||
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
|
||||||
|
|
||||||
###################################
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
###################################
|
|
||||||
|
|
||||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
|
||||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
|
||||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
|
||||||
|
|
||||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
|
||||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
|
||||||
ARG GRPC_VERSION=v1.65.0
|
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ca-certificates \
|
|
||||||
build-essential curl libssl-dev \
|
|
||||||
git && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
|
||||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
|
||||||
# and running make install in the target container
|
|
||||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
mkdir -p /build/grpc/cmake/build && \
|
|
||||||
cd /build/grpc/cmake/build && \
|
|
||||||
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
|
||||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
|
||||||
make && \
|
|
||||||
make install && \
|
|
||||||
rm -rf /build
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
|
||||||
|
|
||||||
FROM requirements-drivers AS builder-base
|
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts p2p"
|
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG BUILD_GRPC=true
|
||||||
ARG LD_FLAGS="-s -w"
|
|
||||||
|
|
||||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
ENV LD_FLAGS=${LD_FLAGS}
|
|
||||||
|
|
||||||
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
|
||||||
# here so that we can generate the grpc code for the stablediffusion build
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
|
||||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
|
||||||
rm protoc.zip
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
|
||||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
|
||||||
FROM builder-base AS builder-sd
|
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
|
||||||
COPY Makefile .
|
|
||||||
COPY go.mod .
|
|
||||||
COPY go.sum .
|
|
||||||
COPY backend/backend.proto ./backend/backend.proto
|
|
||||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
COPY pkg/grpc ./pkg/grpc
|
|
||||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
|
||||||
RUN git init
|
|
||||||
RUN make sources/go-stable-diffusion
|
|
||||||
RUN touch prepare-sources
|
|
||||||
|
|
||||||
# Actually build the backend
|
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
|
||||||
# Adjustments to the build process should likely be made here.
|
|
||||||
FROM builder-sd AS builder
|
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
|
||||||
|
|
||||||
# Rebuild with defaults backends
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY .git .
|
COPY .git .
|
||||||
|
|
||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
## Build the binary
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
apt-get update && \
|
||||||
## (both will use CUDA or hipblas for the actual computation)
|
apt-get install -y libclblast-dev && \
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
apt-get clean \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
; fi
|
||||||
else \
|
|
||||||
make build; \
|
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||||
fi
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
|
|
||||||
|
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
|
||||||
|
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||||
|
-DgRPC_BUILD_TESTS=OFF \
|
||||||
|
../.. && make -j12 install \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
# Rebuild with defaults backends
|
||||||
|
RUN make build
|
||||||
|
|
||||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||||
@@ -342,68 +133,33 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The devcontainer target is not used on CI. It is a target for developers to use locally -
|
FROM requirements-${IMAGE_TYPE}
|
||||||
# rather than copying files it mounts them locally and leaves building to the developer
|
|
||||||
|
|
||||||
FROM builder-base AS devcontainer
|
|
||||||
|
|
||||||
ARG FFMPEG
|
|
||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
|
||||||
|
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
|
||||||
|
|
||||||
# Add FFmpeg
|
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ffmpeg && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
ssh less wget
|
|
||||||
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
|
||||||
|
|
||||||
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
|
||||||
|
|
||||||
RUN go install github.com/mikefarah/yq/v4@latest
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
|
||||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
|
||||||
FROM requirements-drivers
|
|
||||||
|
|
||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG EXTRA_BACKENDS
|
|
||||||
ARG MAKEFLAGS
|
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV REBUILD=false
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
|
||||||
|
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV PIP_CACHE_PURGE=true
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
|
apt-get install -y ffmpeg && apt-get clean \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
# Add OpenCL
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y libclblast1 && \
|
||||||
ffmpeg && \
|
apt-get clean \
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
@@ -415,9 +171,9 @@ WORKDIR /build
|
|||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
COPY --from=builder /build/sources ./sources/
|
COPY --from=builder /build/sources ./sources/
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=builder /build/grpc ./grpc/
|
||||||
|
|
||||||
RUN make prepare-sources
|
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
|
||||||
|
|
||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
@@ -426,57 +182,47 @@ COPY --from=builder /build/local-ai ./
|
|||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||||
SHELL ["/bin/bash", "-c"]
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
make -C backend/python/autogptq \
|
||||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
|
||||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/coqui \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/parler-tts \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/diffusers \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/transformers-musicgen \
|
|
||||||
; fi
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/bark \
|
||||||
make -C backend/python/vall-e-x \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/diffusers \
|
||||||
make -C backend/python/openvoice \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/vllm \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
make -C backend/python/mamba \
|
||||||
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/sentencetransformers \
|
make -C backend/python/sentencetransformers \
|
||||||
; fi && \
|
; fi
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/exllama2 \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/transformers \
|
make -C backend/python/transformers \
|
||||||
; fi
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/vall-e-x \
|
||||||
make -C backend/python/vllm \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/exllama \
|
||||||
make -C backend/python/autogptq \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/exllama2 \
|
||||||
make -C backend/python/bark \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/petals \
|
||||||
make -C backend/python/rerankers \
|
; fi
|
||||||
; fi && \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
make -C backend/python/transformers-musicgen \
|
||||||
make -C backend/python/mamba \
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
make -C backend/python/coqui \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
@@ -484,8 +230,7 @@ RUN mkdir -p /build/models
|
|||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
||||||
|
|
||||||
VOLUME /build/models
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
ARG BASE_IMAGE=ubuntu:22.04
|
|
||||||
|
|
||||||
FROM ${BASE_IMAGE}
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y pciutils && apt-get clean
|
|
||||||
|
|
||||||
COPY aio/ /aio
|
|
||||||
ENTRYPOINT [ "/aio/entrypoint.sh" ]
|
|
||||||
143
README.md
143
README.md
@@ -20,14 +20,14 @@
|
|||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
|
||||||
<a href="https://hub.docker.com/r/localai/localai" target="blank">
|
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
|
||||||
<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker" alt="LocalAI Docker hub"/>
|
|
||||||
</a>
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
<a href="https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest" target="blank">
|
>
|
||||||
<img src="https://img.shields.io/badge/quay.io-images-important.svg?" alt="LocalAI Quay.io"/>
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
</a>
|
|
||||||
</p>
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||||
@@ -36,97 +36,53 @@
|
|||||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||||
</a>
|
</a>
|
||||||
</p>
|
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
||||||
>
|
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
|
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
## 🔥🔥 Hot topics / Roadmap
|
||||||
|
|
||||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||

|
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726
|
||||||
|
- Upload file API: https://github.com/mudler/LocalAI/pull/1703
|
||||||
|
- Tools API support: https://github.com/mudler/LocalAI/pull/1715
|
||||||
|
- LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714
|
||||||
|
- ROCm container images: https://github.com/mudler/LocalAI/pull/1595
|
||||||
|
- Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
|
||||||
|
- Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651
|
||||||
|
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
|
||||||
|
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
|
||||||
|
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
|
||||||
|
- Img2vid https://github.com/mudler/LocalAI/pull/1442
|
||||||
|
|
||||||
Run the installer script:
|
Hot topics (looking for contributors):
|
||||||
|
|
||||||
```bash
|
|
||||||
curl https://localai.io/install.sh | sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Or run with docker:
|
|
||||||
```bash
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|
||||||
# Alternative images:
|
|
||||||
# - if you have an Nvidia GPU:
|
|
||||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
|
||||||
# - without preconfigured models
|
|
||||||
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
|
||||||
# - without preconfigured models for Nvidia GPUs
|
|
||||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
|
||||||
```
|
|
||||||
|
|
||||||
To load models:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
|
|
||||||
local-ai run llama-3.2-1b-instruct:q4_k_m
|
|
||||||
# Start LocalAI with the phi-2 model directly from huggingface
|
|
||||||
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
|
||||||
# Install and run a model from the Ollama OCI registry
|
|
||||||
local-ai run ollama://gemma:2b
|
|
||||||
# Run a model from a configuration file
|
|
||||||
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
|
||||||
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
|
||||||
local-ai run oci://localai/phi-2:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
|
||||||
|
|
||||||
## 📰 Latest project news
|
|
||||||
|
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
|
||||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
|
||||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
|
||||||
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
|
||||||
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
|
||||||
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
|
||||||
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
|
||||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
|
||||||
|
|
||||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
|
||||||
|
|
||||||
## 🔥🔥 Hot topics (looking for help):
|
|
||||||
|
|
||||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
|
||||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
|
||||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||||
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
||||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
|
||||||
|
|
||||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||||
|
|
||||||
|
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
|
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide. For those in a hurry, here's a straightforward one-liner to launch a LocalAI instance with [phi-2](https://huggingface.co/microsoft/phi-2) using `docker`:
|
||||||
|
|
||||||
|
```
|
||||||
|
docker run -ti -p 8080:8080 localai/localai:v2.9.0-ffmpeg-core phi-2
|
||||||
|
```
|
||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||||
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
|
||||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
|
||||||
- 🌍 Integrated WebUI!
|
|
||||||
|
|
||||||
## 💻 Usage
|
## 💻 Usage
|
||||||
|
|
||||||
@@ -140,7 +96,6 @@ Build and deploy custom containers:
|
|||||||
WebUIs:
|
WebUIs:
|
||||||
- https://github.com/Jirubizu/localai-admin
|
- https://github.com/Jirubizu/localai-admin
|
||||||
- https://github.com/go-skynet/LocalAI-frontend
|
- https://github.com/go-skynet/LocalAI-frontend
|
||||||
- QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
|
|
||||||
|
|
||||||
Model galleries
|
Model galleries
|
||||||
- https://github.com/go-skynet/model-gallery
|
- https://github.com/go-skynet/model-gallery
|
||||||
@@ -148,20 +103,17 @@ Model galleries
|
|||||||
Other:
|
Other:
|
||||||
- Helm chart https://github.com/go-skynet/helm-charts
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
|
||||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
|
||||||
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
||||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
|
||||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||||
- Github Actions: https://github.com/marketplace/actions/start-localai
|
|
||||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||||
|
|
||||||
|
|
||||||
### 🔗 Resources
|
### 🔗 Resources
|
||||||
|
|
||||||
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
||||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||||
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
|
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
|
||||||
@@ -169,9 +121,7 @@ Other:
|
|||||||
|
|
||||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||||
|
|
||||||
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
|
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/ai/answers/tiZMDoZzZV6TLxgDXNBnFE/deploying-helm-charts-on-aws-eks)
|
||||||
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
|
|
||||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
|
||||||
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
||||||
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
||||||
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
|
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
|
||||||
@@ -198,16 +148,17 @@ If you utilize this repository, data in a downstream project, please consider ci
|
|||||||
|
|
||||||
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
||||||
|
|
||||||
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
|
A huge thank you to our generous sponsors who support this project:
|
||||||
|
|
||||||
<p align="center">
|
|  |
|
||||||
<a href="https://www.spectrocloud.com/" target="blank">
|
|:-----------------------------------------------:|
|
||||||
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
|
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||||
</a>
|
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
||||||
<a href="https://www.premai.io/" target="blank">
|
|
||||||
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||||
</a>
|
|
||||||
</p>
|
- [Sponsor list](https://github.com/sponsors/mudler)
|
||||||
|
- JDAM00 (donating HW for the CI)
|
||||||
|
|
||||||
## 🌟 Star history
|
## 🌟 Star history
|
||||||
|
|
||||||
@@ -217,7 +168,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
|
|||||||
|
|
||||||
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
||||||
|
|
||||||
MIT - Author Ettore Di Giacinto <mudler@localai.io>
|
MIT - Author Ettore Di Giacinto
|
||||||
|
|
||||||
## 🙇 Acknowledgements
|
## 🙇 Acknowledgements
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
## AIO CPU size
|
|
||||||
|
|
||||||
Use this image with CPU-only.
|
|
||||||
|
|
||||||
Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
name: text-embedding-ada-002
|
|
||||||
backend: bert-embeddings
|
|
||||||
parameters:
|
|
||||||
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"input": "Your text string goes here",
|
|
||||||
"model": "text-embedding-ada-002"
|
|
||||||
}'
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
name: stablediffusion
|
|
||||||
backend: stablediffusion
|
|
||||||
parameters:
|
|
||||||
model: stablediffusion_assets
|
|
||||||
|
|
||||||
license: "BSD-3"
|
|
||||||
urls:
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
|
||||||
|
|
||||||
description: |
|
|
||||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
|
||||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
|
||||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
|
||||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
|
||||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
|
||||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/vocab.txt"
|
|
||||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
|
||||||
"step": 25,
|
|
||||||
"size": "512x512"
|
|
||||||
}'
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
name: jina-reranker-v1-base-en
|
|
||||||
backend: rerankers
|
|
||||||
parameters:
|
|
||||||
model: cross-encoder
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/v1/rerank \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "jina-reranker-v1-base-en",
|
|
||||||
"query": "Organic skincare products for sensitive skin",
|
|
||||||
"documents": [
|
|
||||||
"Eco-friendly kitchenware for modern homes",
|
|
||||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
|
||||||
"Organic cotton baby clothes for sensitive skin",
|
|
||||||
"Natural organic skincare range for sensitive skin",
|
|
||||||
"Tech gadgets for smart homes: 2024 edition",
|
|
||||||
"Sustainable gardening tools and compost solutions",
|
|
||||||
"Sensitive skin-friendly facial cleansers and toners",
|
|
||||||
"Organic food wraps and storage solutions",
|
|
||||||
"All-natural pet food for dogs with allergies",
|
|
||||||
"Yoga mats made from recycled materials"
|
|
||||||
],
|
|
||||||
"top_n": 3
|
|
||||||
}'
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
name: whisper-1
|
|
||||||
backend: whisper
|
|
||||||
parameters:
|
|
||||||
model: ggml-whisper-base.bin
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
## example audio file
|
|
||||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
|
||||||
|
|
||||||
## Send the example audio file to the transcriptions endpoint
|
|
||||||
curl http://localhost:8080/v1/audio/transcriptions \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: "ggml-whisper-base.bin"
|
|
||||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
|
||||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
name: tts-1
|
|
||||||
download_files:
|
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
model: en-us-amy-low.onnx
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
To test if this model works as expected, you can use the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"model":"voice-en-us-amy-low",
|
|
||||||
"input": "Hi, this is a test."
|
|
||||||
}'
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
name: gpt-4
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
|
||||||
context_size: 8192
|
|
||||||
|
|
||||||
stopwords:
|
|
||||||
- "<|im_end|>"
|
|
||||||
- "<dummy32000>"
|
|
||||||
- "</tool_call>"
|
|
||||||
- "<|eot_id|>"
|
|
||||||
- "<|end_of_text|>"
|
|
||||||
|
|
||||||
function:
|
|
||||||
# disable injecting the "answer" tool
|
|
||||||
disable_no_action: true
|
|
||||||
|
|
||||||
grammar:
|
|
||||||
# This allows the grammar to also return messages
|
|
||||||
mixed_mode: true
|
|
||||||
# Suffix to add to the grammar
|
|
||||||
#prefix: '<tool_call>\n'
|
|
||||||
# Force parallel calls in the grammar
|
|
||||||
# parallel_calls: true
|
|
||||||
|
|
||||||
return_name_in_function_response: true
|
|
||||||
# Without grammar uncomment the lines below
|
|
||||||
# Warning: this is relying only on the capability of the
|
|
||||||
# LLM model to generate the correct function call.
|
|
||||||
json_regex_match:
|
|
||||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
|
||||||
- "(?s)<tool_call>(.*?)"
|
|
||||||
replace_llm_results:
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
replace_function_results:
|
|
||||||
# Replace everything that is not JSON array or object
|
|
||||||
#
|
|
||||||
- key: '(?s)^[^{\[]*'
|
|
||||||
value: ""
|
|
||||||
- key: '(?s)[^}\]]*$'
|
|
||||||
value: ""
|
|
||||||
- key: "'([^']*?)'"
|
|
||||||
value: "_DQUOTE_${1}_DQUOTE_"
|
|
||||||
- key: '\\"'
|
|
||||||
value: "__TEMP_QUOTE__"
|
|
||||||
- key: "\'"
|
|
||||||
value: "'"
|
|
||||||
- key: "_DQUOTE_"
|
|
||||||
value: '"'
|
|
||||||
- key: "__TEMP_QUOTE__"
|
|
||||||
value: '"'
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
<tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
<tool_response>
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Content}}
|
|
||||||
{{.Content }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
function: |-
|
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model.
|
|
||||||
Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
</tools>
|
|
||||||
You should call the tools provided to you sequentially
|
|
||||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
|
||||||
<scratchpad>
|
|
||||||
{step-by-step reasoning and plan in bullet points}
|
|
||||||
</scratchpad>
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{"arguments": <args-dict>, "name": <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
mmap: true
|
|
||||||
name: gpt-4o
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: bakllava-mmproj.gguf
|
|
||||||
parameters:
|
|
||||||
model: bakllava.gguf
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: bakllava.gguf
|
|
||||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
|
||||||
- filename: bakllava-mmproj.gguf
|
|
||||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4-vision-preview",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
||||||
@@ -1,138 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo "===> LocalAI All-in-One (AIO) container starting..."
|
|
||||||
|
|
||||||
GPU_ACCELERATION=false
|
|
||||||
GPU_VENDOR=""
|
|
||||||
|
|
||||||
function check_intel() {
|
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq intel; then
|
|
||||||
echo "Intel GPU detected"
|
|
||||||
if [ -d /opt/intel ]; then
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=intel
|
|
||||||
else
|
|
||||||
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_nvidia_wsl() {
|
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
|
|
||||||
# We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
|
|
||||||
# Make sure the container was run with `--gpus all` as the only required parameter
|
|
||||||
echo "NVIDIA GPU detected via WSL2"
|
|
||||||
# nvidia-smi should be installed in the container
|
|
||||||
if nvidia-smi; then
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=nvidia
|
|
||||||
else
|
|
||||||
echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_amd() {
|
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq amd; then
|
|
||||||
echo "AMD GPU detected"
|
|
||||||
# Check if ROCm is installed
|
|
||||||
if [ -d /opt/rocm ]; then
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=amd
|
|
||||||
else
|
|
||||||
echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_nvidia() {
|
|
||||||
if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
|
|
||||||
echo "NVIDIA GPU detected"
|
|
||||||
# nvidia-smi should be installed in the container
|
|
||||||
if nvidia-smi; then
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=nvidia
|
|
||||||
else
|
|
||||||
echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_metal() {
|
|
||||||
if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
|
|
||||||
echo "Apple Metal supported GPU detected"
|
|
||||||
GPU_ACCELERATION=true
|
|
||||||
GPU_VENDOR=apple
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function detect_gpu() {
|
|
||||||
case "$(uname -s)" in
|
|
||||||
Linux)
|
|
||||||
check_nvidia
|
|
||||||
check_amd
|
|
||||||
check_intel
|
|
||||||
check_nvidia_wsl
|
|
||||||
;;
|
|
||||||
Darwin)
|
|
||||||
check_metal
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
}
|
|
||||||
|
|
||||||
function detect_gpu_size() {
|
|
||||||
# Attempting to find GPU memory size for NVIDIA GPUs
|
|
||||||
if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
|
|
||||||
echo "NVIDIA GPU detected. Attempting to find memory size..."
|
|
||||||
# Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
|
|
||||||
# If handling multiple GPUs is required in the future, this is the place to do it
|
|
||||||
nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
|
|
||||||
if [ ! -z "$nvidia_sm" ]; then
|
|
||||||
echo "Total GPU Memory: $nvidia_sm MiB"
|
|
||||||
# if bigger than 8GB, use 16GB
|
|
||||||
#if [ "$nvidia_sm" -gt 8192 ]; then
|
|
||||||
# GPU_SIZE=gpu-16g
|
|
||||||
#else
|
|
||||||
GPU_SIZE=gpu-8g
|
|
||||||
#fi
|
|
||||||
else
|
|
||||||
echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
|
|
||||||
GPU_SIZE=gpu-8g
|
|
||||||
fi
|
|
||||||
elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
|
|
||||||
GPU_SIZE=intel
|
|
||||||
# Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
|
|
||||||
elif [ "$GPU_ACCELERATION" = true ]; then
|
|
||||||
echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
|
|
||||||
GPU_SIZE=gpu-8g
|
|
||||||
|
|
||||||
# default to cpu if GPU_SIZE is not set
|
|
||||||
else
|
|
||||||
echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
|
|
||||||
GPU_SIZE=cpu
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_vars() {
|
|
||||||
if [ -z "$MODELS" ]; then
|
|
||||||
echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -z "$PROFILE" ]; then
|
|
||||||
echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
detect_gpu
|
|
||||||
detect_gpu_size
|
|
||||||
|
|
||||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
|
||||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
|
||||||
|
|
||||||
check_vars
|
|
||||||
|
|
||||||
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
|
|
||||||
|
|
||||||
exec /build/entrypoint.sh "$@"
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
name: text-embedding-ada-002
|
|
||||||
backend: sentencetransformers
|
|
||||||
parameters:
|
|
||||||
model: all-MiniLM-L6-v2
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"input": "Your text string goes here",
|
|
||||||
"model": "text-embedding-ada-002"
|
|
||||||
}'
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
name: stablediffusion
|
|
||||||
parameters:
|
|
||||||
model: DreamShaper_8_pruned.safetensors
|
|
||||||
backend: diffusers
|
|
||||||
step: 25
|
|
||||||
f16: true
|
|
||||||
|
|
||||||
diffusers:
|
|
||||||
pipeline_type: StableDiffusionPipeline
|
|
||||||
cuda: true
|
|
||||||
enable_parameters: "negative_prompt,num_inference_steps"
|
|
||||||
scheduler_type: "k_dpmpp_2m"
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: DreamShaper_8_pruned.safetensors
|
|
||||||
uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
|
||||||
"step": 25,
|
|
||||||
"size": "512x512"
|
|
||||||
}'
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
name: jina-reranker-v1-base-en
|
|
||||||
backend: rerankers
|
|
||||||
parameters:
|
|
||||||
model: cross-encoder
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/v1/rerank \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "jina-reranker-v1-base-en",
|
|
||||||
"query": "Organic skincare products for sensitive skin",
|
|
||||||
"documents": [
|
|
||||||
"Eco-friendly kitchenware for modern homes",
|
|
||||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
|
||||||
"Organic cotton baby clothes for sensitive skin",
|
|
||||||
"Natural organic skincare range for sensitive skin",
|
|
||||||
"Tech gadgets for smart homes: 2024 edition",
|
|
||||||
"Sustainable gardening tools and compost solutions",
|
|
||||||
"Sensitive skin-friendly facial cleansers and toners",
|
|
||||||
"Organic food wraps and storage solutions",
|
|
||||||
"All-natural pet food for dogs with allergies",
|
|
||||||
"Yoga mats made from recycled materials"
|
|
||||||
],
|
|
||||||
"top_n": 3
|
|
||||||
}'
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
name: whisper-1
|
|
||||||
backend: whisper
|
|
||||||
parameters:
|
|
||||||
model: ggml-whisper-base.bin
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
## example audio file
|
|
||||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
|
||||||
|
|
||||||
## Send the example audio file to the transcriptions endpoint
|
|
||||||
curl http://localhost:8080/v1/audio/transcriptions \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: "ggml-whisper-base.bin"
|
|
||||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
|
||||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
name: tts-1
|
|
||||||
download_files:
|
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
model: en-us-amy-low.onnx
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
To test if this model works as expected, you can use the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"model":"tts-1",
|
|
||||||
"input": "Hi, this is a test."
|
|
||||||
}'
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
name: gpt-4
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
|
||||||
context_size: 8192
|
|
||||||
|
|
||||||
stopwords:
|
|
||||||
- "<|im_end|>"
|
|
||||||
- "<dummy32000>"
|
|
||||||
- "</tool_call>"
|
|
||||||
- "<|eot_id|>"
|
|
||||||
- "<|end_of_text|>"
|
|
||||||
|
|
||||||
function:
|
|
||||||
# disable injecting the "answer" tool
|
|
||||||
disable_no_action: true
|
|
||||||
|
|
||||||
grammar:
|
|
||||||
# This allows the grammar to also return messages
|
|
||||||
mixed_mode: true
|
|
||||||
# Suffix to add to the grammar
|
|
||||||
#prefix: '<tool_call>\n'
|
|
||||||
# Force parallel calls in the grammar
|
|
||||||
# parallel_calls: true
|
|
||||||
|
|
||||||
return_name_in_function_response: true
|
|
||||||
# Without grammar uncomment the lines below
|
|
||||||
# Warning: this is relying only on the capability of the
|
|
||||||
# LLM model to generate the correct function call.
|
|
||||||
json_regex_match:
|
|
||||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
|
||||||
- "(?s)<tool_call>(.*?)"
|
|
||||||
replace_llm_results:
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
replace_function_results:
|
|
||||||
# Replace everything that is not JSON array or object
|
|
||||||
#
|
|
||||||
- key: '(?s)^[^{\[]*'
|
|
||||||
value: ""
|
|
||||||
- key: '(?s)[^}\]]*$'
|
|
||||||
value: ""
|
|
||||||
- key: "'([^']*?)'"
|
|
||||||
value: "_DQUOTE_${1}_DQUOTE_"
|
|
||||||
- key: '\\"'
|
|
||||||
value: "__TEMP_QUOTE__"
|
|
||||||
- key: "\'"
|
|
||||||
value: "'"
|
|
||||||
- key: "_DQUOTE_"
|
|
||||||
value: '"'
|
|
||||||
- key: "__TEMP_QUOTE__"
|
|
||||||
value: '"'
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
<tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
<tool_response>
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Content}}
|
|
||||||
{{.Content }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
function: |-
|
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model.
|
|
||||||
Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
</tools>
|
|
||||||
You should call the tools provided to you sequentially
|
|
||||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
|
||||||
<scratchpad>
|
|
||||||
{step-by-step reasoning and plan in bullet points}
|
|
||||||
</scratchpad>
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{"arguments": <args-dict>, "name": <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
f16: true
|
|
||||||
mmap: true
|
|
||||||
name: gpt-4o
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
parameters:
|
|
||||||
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4-vision-preview",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
name: text-embedding-ada-002
|
|
||||||
backend: sentencetransformers
|
|
||||||
parameters:
|
|
||||||
model: all-MiniLM-L6-v2
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"input": "Your text string goes here",
|
|
||||||
"model": "text-embedding-ada-002"
|
|
||||||
}'
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
name: stablediffusion
|
|
||||||
parameters:
|
|
||||||
model: Lykon/dreamshaper-8
|
|
||||||
backend: diffusers
|
|
||||||
step: 25
|
|
||||||
f16: true
|
|
||||||
diffusers:
|
|
||||||
pipeline_type: StableDiffusionPipeline
|
|
||||||
cuda: true
|
|
||||||
enable_parameters: "negative_prompt,num_inference_steps"
|
|
||||||
scheduler_type: "k_dpmpp_2m"
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/images/generations \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
|
||||||
"step": 25,
|
|
||||||
"size": "512x512"
|
|
||||||
}'
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
name: jina-reranker-v1-base-en
|
|
||||||
backend: rerankers
|
|
||||||
parameters:
|
|
||||||
model: cross-encoder
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/v1/rerank \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"model": "jina-reranker-v1-base-en",
|
|
||||||
"query": "Organic skincare products for sensitive skin",
|
|
||||||
"documents": [
|
|
||||||
"Eco-friendly kitchenware for modern homes",
|
|
||||||
"Biodegradable cleaning supplies for eco-conscious consumers",
|
|
||||||
"Organic cotton baby clothes for sensitive skin",
|
|
||||||
"Natural organic skincare range for sensitive skin",
|
|
||||||
"Tech gadgets for smart homes: 2024 edition",
|
|
||||||
"Sustainable gardening tools and compost solutions",
|
|
||||||
"Sensitive skin-friendly facial cleansers and toners",
|
|
||||||
"Organic food wraps and storage solutions",
|
|
||||||
"All-natural pet food for dogs with allergies",
|
|
||||||
"Yoga mats made from recycled materials"
|
|
||||||
],
|
|
||||||
"top_n": 3
|
|
||||||
}'
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
name: whisper-1
|
|
||||||
backend: whisper
|
|
||||||
parameters:
|
|
||||||
model: ggml-whisper-base.bin
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
## example audio file
|
|
||||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
|
||||||
|
|
||||||
## Send the example audio file to the transcriptions endpoint
|
|
||||||
curl http://localhost:8080/v1/audio/transcriptions \
|
|
||||||
-H "Content-Type: multipart/form-data" \
|
|
||||||
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: "ggml-whisper-base.bin"
|
|
||||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
|
||||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
name: tts-1
|
|
||||||
download_files:
|
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
model: en-us-amy-low.onnx
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
To test if this model works as expected, you can use the following curl command:
|
|
||||||
|
|
||||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
|
||||||
"model":"tts-1",
|
|
||||||
"input": "Hi, this is a test."
|
|
||||||
}'
|
|
||||||
@@ -1,103 +0,0 @@
|
|||||||
name: gpt-4
|
|
||||||
mmap: false
|
|
||||||
context_size: 8192
|
|
||||||
|
|
||||||
f16: false
|
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
|
||||||
|
|
||||||
stopwords:
|
|
||||||
- "<|im_end|>"
|
|
||||||
- "<dummy32000>"
|
|
||||||
- "</tool_call>"
|
|
||||||
- "<|eot_id|>"
|
|
||||||
- "<|end_of_text|>"
|
|
||||||
|
|
||||||
function:
|
|
||||||
# disable injecting the "answer" tool
|
|
||||||
disable_no_action: true
|
|
||||||
|
|
||||||
grammar:
|
|
||||||
# This allows the grammar to also return messages
|
|
||||||
mixed_mode: true
|
|
||||||
# Suffix to add to the grammar
|
|
||||||
#prefix: '<tool_call>\n'
|
|
||||||
# Force parallel calls in the grammar
|
|
||||||
# parallel_calls: true
|
|
||||||
|
|
||||||
return_name_in_function_response: true
|
|
||||||
# Without grammar uncomment the lines below
|
|
||||||
# Warning: this is relying only on the capability of the
|
|
||||||
# LLM model to generate the correct function call.
|
|
||||||
json_regex_match:
|
|
||||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
|
||||||
- "(?s)<tool_call>(.*?)"
|
|
||||||
replace_llm_results:
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
replace_function_results:
|
|
||||||
# Replace everything that is not JSON array or object
|
|
||||||
#
|
|
||||||
- key: '(?s)^[^{\[]*'
|
|
||||||
value: ""
|
|
||||||
- key: '(?s)[^}\]]*$'
|
|
||||||
value: ""
|
|
||||||
- key: "'([^']*?)'"
|
|
||||||
value: "_DQUOTE_${1}_DQUOTE_"
|
|
||||||
- key: '\\"'
|
|
||||||
value: "__TEMP_QUOTE__"
|
|
||||||
- key: "\'"
|
|
||||||
value: "'"
|
|
||||||
- key: "_DQUOTE_"
|
|
||||||
value: '"'
|
|
||||||
- key: "__TEMP_QUOTE__"
|
|
||||||
value: '"'
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
chat_message: |
|
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
<tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
<tool_response>
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Content}}
|
|
||||||
{{.Content }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall}}
|
|
||||||
{{toJson .FunctionCall}}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
completion: |
|
|
||||||
{{.Input}}
|
|
||||||
function: |-
|
|
||||||
<|im_start|>system
|
|
||||||
You are a function calling AI model.
|
|
||||||
Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
|
||||||
{{end}}
|
|
||||||
</tools>
|
|
||||||
You should call the tools provided to you sequentially
|
|
||||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
|
||||||
<scratchpad>
|
|
||||||
{step-by-step reasoning and plan in bullet points}
|
|
||||||
</scratchpad>
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{"arguments": <args-dict>, "name": <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
|
||||||
mmap: false
|
|
||||||
f16: false
|
|
||||||
name: gpt-4o
|
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
parameters:
|
|
||||||
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
temperature: 0.2
|
|
||||||
top_k: 40
|
|
||||||
top_p: 0.95
|
|
||||||
seed: -1
|
|
||||||
|
|
||||||
template:
|
|
||||||
chat: |
|
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
|
||||||
{{.Input}}
|
|
||||||
ASSISTANT:
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
|
||||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4-vision-preview",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
||||||
@@ -16,88 +16,8 @@ service Backend {
|
|||||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||||
rpc TTS(TTSRequest) returns (Result) {}
|
rpc TTS(TTSRequest) returns (Result) {}
|
||||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
|
||||||
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
||||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||||
|
|
||||||
rpc StoresSet(StoresSetOptions) returns (Result) {}
|
|
||||||
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
|
||||||
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
|
||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
|
||||||
|
|
||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
|
||||||
|
|
||||||
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Define the empty request
|
|
||||||
message MetricsRequest {}
|
|
||||||
|
|
||||||
message MetricsResponse {
|
|
||||||
int32 slot_id = 1;
|
|
||||||
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
|
|
||||||
float tokens_per_second = 3;
|
|
||||||
int32 tokens_generated = 4;
|
|
||||||
int32 prompt_tokens_processed = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
message RerankRequest {
|
|
||||||
string query = 1;
|
|
||||||
repeated string documents = 2;
|
|
||||||
int32 top_n = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
message RerankResult {
|
|
||||||
Usage usage = 1;
|
|
||||||
repeated DocumentResult results = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message Usage {
|
|
||||||
int32 total_tokens = 1;
|
|
||||||
int32 prompt_tokens = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message DocumentResult {
|
|
||||||
int32 index = 1;
|
|
||||||
string text = 2;
|
|
||||||
float relevance_score = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresKey {
|
|
||||||
repeated float Floats = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresValue {
|
|
||||||
bytes Bytes = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresSetOptions {
|
|
||||||
repeated StoresKey Keys = 1;
|
|
||||||
repeated StoresValue Values = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresDeleteOptions {
|
|
||||||
repeated StoresKey Keys = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresGetOptions {
|
|
||||||
repeated StoresKey Keys = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresGetResult {
|
|
||||||
repeated StoresKey Keys = 1;
|
|
||||||
repeated StoresValue Values = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresFindOptions {
|
|
||||||
StoresKey Key = 1;
|
|
||||||
int32 TopK = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message StoresFindResult {
|
|
||||||
repeated StoresKey Keys = 1;
|
|
||||||
repeated StoresValue Values = 2;
|
|
||||||
repeated float Similarities = 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message HealthMessage {}
|
message HealthMessage {}
|
||||||
@@ -145,18 +65,11 @@ message PredictOptions {
|
|||||||
string NegativePrompt = 40;
|
string NegativePrompt = 40;
|
||||||
int32 NDraft = 41;
|
int32 NDraft = 41;
|
||||||
repeated string Images = 42;
|
repeated string Images = 42;
|
||||||
bool UseTokenizerTemplate = 43;
|
|
||||||
repeated Message Messages = 44;
|
|
||||||
repeated string Videos = 45;
|
|
||||||
repeated string Audios = 46;
|
|
||||||
string CorrelationId = 47;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
message Reply {
|
message Reply {
|
||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
int32 tokens = 2;
|
|
||||||
int32 prompt_tokens = 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -218,8 +131,6 @@ message ModelOptions {
|
|||||||
bool EnforceEager = 52;
|
bool EnforceEager = 52;
|
||||||
int32 SwapSpace = 53;
|
int32 SwapSpace = 53;
|
||||||
int32 MaxModelLen = 54;
|
int32 MaxModelLen = 54;
|
||||||
int32 TensorParallelSize = 55;
|
|
||||||
string LoadFormat = 58;
|
|
||||||
|
|
||||||
string MMProj = 41;
|
string MMProj = 41;
|
||||||
|
|
||||||
@@ -230,9 +141,6 @@ message ModelOptions {
|
|||||||
float YarnBetaSlow = 47;
|
float YarnBetaSlow = 47;
|
||||||
|
|
||||||
string Type = 49;
|
string Type = 49;
|
||||||
|
|
||||||
bool FlashAttention = 56;
|
|
||||||
bool NoKVOffload = 57;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -248,7 +156,6 @@ message TranscriptRequest {
|
|||||||
string dst = 2;
|
string dst = 2;
|
||||||
string language = 3;
|
string language = 3;
|
||||||
uint32 threads = 4;
|
uint32 threads = 4;
|
||||||
bool translate = 5;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message TranscriptResult {
|
message TranscriptResult {
|
||||||
@@ -285,18 +192,6 @@ message TTSRequest {
|
|||||||
string model = 2;
|
string model = 2;
|
||||||
string dst = 3;
|
string dst = 3;
|
||||||
string voice = 4;
|
string voice = 4;
|
||||||
optional string language = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
message SoundGenerationRequest {
|
|
||||||
string text = 1;
|
|
||||||
string model = 2;
|
|
||||||
string dst = 3;
|
|
||||||
optional float duration = 4;
|
|
||||||
optional float temperature = 5;
|
|
||||||
optional bool sample = 6;
|
|
||||||
optional string src = 7;
|
|
||||||
optional int32 src_divisor = 8;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message TokenizationResponse {
|
message TokenizationResponse {
|
||||||
@@ -319,8 +214,3 @@ message StatusResponse {
|
|||||||
State state = 1;
|
State state = 1;
|
||||||
MemoryUsageData memory = 2;
|
MemoryUsageData memory = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Message {
|
|
||||||
string role = 1;
|
|
||||||
string content = 2;
|
|
||||||
}
|
|
||||||
457
backend/backend_grpc.pb.go
Normal file
457
backend/backend_grpc.pb.go
Normal file
@@ -0,0 +1,457 @@
|
|||||||
|
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
||||||
|
// versions:
|
||||||
|
// - protoc-gen-go-grpc v1.2.0
|
||||||
|
// - protoc v4.23.4
|
||||||
|
// source: backend/backend.proto
|
||||||
|
|
||||||
|
package proto
|
||||||
|
|
||||||
|
import (
|
||||||
|
context "context"
|
||||||
|
grpc "google.golang.org/grpc"
|
||||||
|
codes "google.golang.org/grpc/codes"
|
||||||
|
status "google.golang.org/grpc/status"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This is a compile-time assertion to ensure that this generated file
|
||||||
|
// is compatible with the grpc package it is being compiled against.
|
||||||
|
// Requires gRPC-Go v1.32.0 or later.
|
||||||
|
const _ = grpc.SupportPackageIsVersion7
|
||||||
|
|
||||||
|
// BackendClient is the client API for Backend service.
|
||||||
|
//
|
||||||
|
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
||||||
|
type BackendClient interface {
|
||||||
|
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
|
||||||
|
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
|
||||||
|
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
|
||||||
|
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
|
||||||
|
GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
|
||||||
|
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
|
||||||
|
TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
|
||||||
|
Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendClient struct {
|
||||||
|
cc grpc.ClientConnInterface
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
|
||||||
|
return &backendClient{cc}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
|
||||||
|
out := new(Reply)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
|
||||||
|
out := new(Reply)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
|
||||||
|
stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
x := &backendPredictStreamClient{stream}
|
||||||
|
if err := x.ClientStream.SendMsg(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := x.ClientStream.CloseSend(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return x, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type Backend_PredictStreamClient interface {
|
||||||
|
Recv() (*Reply, error)
|
||||||
|
grpc.ClientStream
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendPredictStreamClient struct {
|
||||||
|
grpc.ClientStream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *backendPredictStreamClient) Recv() (*Reply, error) {
|
||||||
|
m := new(Reply)
|
||||||
|
if err := x.ClientStream.RecvMsg(m); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
|
||||||
|
out := new(EmbeddingResult)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
|
||||||
|
out := new(TranscriptResult)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
|
||||||
|
out := new(Result)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
|
||||||
|
out := new(TokenizationResponse)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
|
||||||
|
out := new(StatusResponse)
|
||||||
|
err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BackendServer is the server API for Backend service.
|
||||||
|
// All implementations must embed UnimplementedBackendServer
|
||||||
|
// for forward compatibility
|
||||||
|
type BackendServer interface {
|
||||||
|
Health(context.Context, *HealthMessage) (*Reply, error)
|
||||||
|
Predict(context.Context, *PredictOptions) (*Reply, error)
|
||||||
|
LoadModel(context.Context, *ModelOptions) (*Result, error)
|
||||||
|
PredictStream(*PredictOptions, Backend_PredictStreamServer) error
|
||||||
|
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
|
||||||
|
GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
|
||||||
|
AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
|
||||||
|
TTS(context.Context, *TTSRequest) (*Result, error)
|
||||||
|
TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
|
||||||
|
Status(context.Context, *HealthMessage) (*StatusResponse, error)
|
||||||
|
mustEmbedUnimplementedBackendServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnimplementedBackendServer must be embedded to have forward compatible implementations.
|
||||||
|
type UnimplementedBackendServer struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
|
||||||
|
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
|
||||||
|
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
|
||||||
|
}
|
||||||
|
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
|
||||||
|
|
||||||
|
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
|
||||||
|
// Use of this interface is not recommended, as added methods to BackendServer will
|
||||||
|
// result in compilation errors.
|
||||||
|
type UnsafeBackendServer interface {
|
||||||
|
mustEmbedUnimplementedBackendServer()
|
||||||
|
}
|
||||||
|
|
||||||
|
func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
|
||||||
|
s.RegisterService(&Backend_ServiceDesc, srv)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(HealthMessage)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Health(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Health",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Predict(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Predict",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(ModelOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).LoadModel(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/LoadModel",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
|
||||||
|
m := new(PredictOptions)
|
||||||
|
if err := stream.RecvMsg(m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
|
||||||
|
}
|
||||||
|
|
||||||
|
type Backend_PredictStreamServer interface {
|
||||||
|
Send(*Reply) error
|
||||||
|
grpc.ServerStream
|
||||||
|
}
|
||||||
|
|
||||||
|
type backendPredictStreamServer struct {
|
||||||
|
grpc.ServerStream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *backendPredictStreamServer) Send(m *Reply) error {
|
||||||
|
return x.ServerStream.SendMsg(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Embedding(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Embedding",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(GenerateImageRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).GenerateImage(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/GenerateImage",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(TranscriptRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).AudioTranscription(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/AudioTranscription",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(TTSRequest)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).TTS(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/TTS",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(PredictOptions)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).TokenizeString(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/TokenizeString",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||||
|
in := new(HealthMessage)
|
||||||
|
if err := dec(in); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if interceptor == nil {
|
||||||
|
return srv.(BackendServer).Status(ctx, in)
|
||||||
|
}
|
||||||
|
info := &grpc.UnaryServerInfo{
|
||||||
|
Server: srv,
|
||||||
|
FullMethod: "/backend.Backend/Status",
|
||||||
|
}
|
||||||
|
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||||
|
return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
|
||||||
|
}
|
||||||
|
return interceptor(ctx, in, info, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
|
||||||
|
// It's only intended for direct use with grpc.RegisterService,
|
||||||
|
// and not to be introspected or modified (even as a copy)
|
||||||
|
var Backend_ServiceDesc = grpc.ServiceDesc{
|
||||||
|
ServiceName: "backend.Backend",
|
||||||
|
HandlerType: (*BackendServer)(nil),
|
||||||
|
Methods: []grpc.MethodDesc{
|
||||||
|
{
|
||||||
|
MethodName: "Health",
|
||||||
|
Handler: _Backend_Health_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Predict",
|
||||||
|
Handler: _Backend_Predict_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "LoadModel",
|
||||||
|
Handler: _Backend_LoadModel_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Embedding",
|
||||||
|
Handler: _Backend_Embedding_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "GenerateImage",
|
||||||
|
Handler: _Backend_GenerateImage_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "AudioTranscription",
|
||||||
|
Handler: _Backend_AudioTranscription_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "TTS",
|
||||||
|
Handler: _Backend_TTS_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "TokenizeString",
|
||||||
|
Handler: _Backend_TokenizeString_Handler,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
MethodName: "Status",
|
||||||
|
Handler: _Backend_Status_Handler,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Streams: []grpc.StreamDesc{
|
||||||
|
{
|
||||||
|
StreamName: "PredictStream",
|
||||||
|
Handler: _Backend_PredictStream_Handler,
|
||||||
|
ServerStreams: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Metadata: "backend/backend.proto",
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@ SYSTEM ?= $(HOST_SYSTEM)
|
|||||||
TAG_LIB_GRPC?=v1.59.0
|
TAG_LIB_GRPC?=v1.59.0
|
||||||
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
|
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
|
||||||
GIT_CLONE_DEPTH?=1
|
GIT_CLONE_DEPTH?=1
|
||||||
|
NUM_BUILD_THREADS?=$(shell nproc --ignore=1)
|
||||||
|
|
||||||
INSTALLED_PACKAGES=installed_packages
|
INSTALLED_PACKAGES=installed_packages
|
||||||
GRPC_REPO=grpc_repo
|
GRPC_REPO=grpc_repo
|
||||||
@@ -46,17 +47,12 @@ endif
|
|||||||
$(INSTALLED_PACKAGES): grpc_build
|
$(INSTALLED_PACKAGES): grpc_build
|
||||||
|
|
||||||
$(GRPC_REPO):
|
$(GRPC_REPO):
|
||||||
mkdir -p $(GRPC_REPO)/grpc
|
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
|
||||||
cd $(GRPC_REPO)/grpc && \
|
cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH)
|
||||||
git init && \
|
|
||||||
git remote add origin $(GIT_REPO_LIB_GRPC) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(TAG_LIB_GRPC) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
$(GRPC_BUILD): $(GRPC_REPO)
|
$(GRPC_BUILD): $(GRPC_REPO)
|
||||||
mkdir -p $(GRPC_BUILD)
|
mkdir -p $(GRPC_BUILD)
|
||||||
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
|
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
||||||
|
|
||||||
build: $(INSTALLED_PACKAGES)
|
build: $(INSTALLED_PACKAGES)
|
||||||
|
|
||||||
|
|||||||
@@ -1,82 +1,71 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=
|
LLAMA_VERSION?=
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
CMAKE_ARGS?=
|
||||||
BUILD_TYPE?=
|
BUILD_TYPE?=
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
TARGET?=--target grpc-server
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
# to CMAKE_ARGS automatically
|
# to CMAKE_ARGS automatically
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
else ifeq ($(BUILD_TYPE),clblas)
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
|
||||||
# But if it's OSX without metal, disable it here
|
|
||||||
else ifeq ($(OS),Darwin)
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
|
||||||
# Until this is tested properly, we disable embedded metal file
|
|
||||||
# as we already embed it as part of the LocalAI assets
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
endif
|
endif
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
mkdir -p llama.cpp
|
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||||
cd llama.cpp && \
|
if [ -z "$(LLAMA_VERSION)" ]; then \
|
||||||
git init && \
|
exit 1; \
|
||||||
git remote add origin $(LLAMA_REPO) && \
|
fi
|
||||||
git fetch origin && \
|
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
git checkout -b build $(LLAMA_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
llama.cpp/examples/grpc-server: llama.cpp
|
llama.cpp/examples/grpc-server:
|
||||||
mkdir -p llama.cpp/examples/grpc-server
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
bash prepare.sh
|
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
|
||||||
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||||
|
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||||
|
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||||
|
|
||||||
rebuild:
|
rebuild:
|
||||||
bash prepare.sh
|
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
$(MAKE) grpc-server
|
$(MAKE) grpc-server
|
||||||
|
|
||||||
purge:
|
clean:
|
||||||
rm -rf llama.cpp/build
|
rm -rf llama.cpp
|
||||||
rm -rf llama.cpp/examples/grpc-server
|
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
|
|
||||||
clean: purge
|
|
||||||
rm -rf llama.cpp
|
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
bash -c "source $(ONEAPI_VARS); \
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
|
||||||
else
|
else
|
||||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||||
endif
|
endif
|
||||||
cp llama.cpp/build/bin/grpc-server .
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
@@ -13,15 +13,15 @@
|
|||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "clip.h"
|
#include "clip.h"
|
||||||
#include "llava.h"
|
#include "llava.h"
|
||||||
#include "log.h"
|
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
#include "grammar-parser.h"
|
||||||
#include "backend.pb.h"
|
#include "backend.pb.h"
|
||||||
#include "backend.grpc.pb.h"
|
#include "backend.grpc.pb.h"
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
#include "sampling.h"
|
|
||||||
// include std::regex
|
// include std::regex
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
@@ -113,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
|||||||
std::string ret;
|
std::string ret;
|
||||||
for (; begin != end; ++begin)
|
for (; begin != end; ++begin)
|
||||||
{
|
{
|
||||||
ret += common_token_to_piece(ctx, *begin);
|
ret += llama_token_to_piece(ctx, *begin);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -121,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
|||||||
// format incomplete utf-8 multibyte character for output
|
// format incomplete utf-8 multibyte character for output
|
||||||
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
||||||
{
|
{
|
||||||
std::string out = token == -1 ? "" : common_token_to_piece(ctx, token);
|
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
||||||
// if the size is 1 and first bit is 1, meaning it's a partial character
|
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||||
// (size > 1 meaning it's already a known token)
|
// (size > 1 meaning it's already a known token)
|
||||||
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
|
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
|
||||||
@@ -203,8 +203,8 @@ struct llama_client_slot
|
|||||||
std::string stopping_word;
|
std::string stopping_word;
|
||||||
|
|
||||||
// sampling
|
// sampling
|
||||||
struct common_sampler_params sparams;
|
struct llama_sampling_params sparams;
|
||||||
common_sampler *ctx_sampling = nullptr;
|
llama_sampling_context *ctx_sampling = nullptr;
|
||||||
|
|
||||||
int32_t ga_i = 0; // group-attention state
|
int32_t ga_i = 0; // group-attention state
|
||||||
int32_t ga_n = 1; // group-attention factor
|
int32_t ga_n = 1; // group-attention factor
|
||||||
@@ -257,7 +257,7 @@ struct llama_client_slot
|
|||||||
images.clear();
|
images.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_budget(common_params &global_params) {
|
bool has_budget(gpt_params &global_params) {
|
||||||
if (params.n_predict == -1 && global_params.n_predict == -1)
|
if (params.n_predict == -1 && global_params.n_predict == -1)
|
||||||
{
|
{
|
||||||
return true; // limitless
|
return true; // limitless
|
||||||
@@ -391,39 +391,6 @@ struct llama_metrics {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llava_embd_batch {
|
|
||||||
std::vector<llama_pos> pos;
|
|
||||||
std::vector<int32_t> n_seq_id;
|
|
||||||
std::vector<llama_seq_id> seq_id_0;
|
|
||||||
std::vector<llama_seq_id *> seq_ids;
|
|
||||||
std::vector<int8_t> logits;
|
|
||||||
llama_batch batch;
|
|
||||||
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
|
||||||
pos .resize(n_tokens);
|
|
||||||
n_seq_id.resize(n_tokens);
|
|
||||||
seq_ids .resize(n_tokens + 1);
|
|
||||||
logits .resize(n_tokens);
|
|
||||||
seq_id_0.resize(1);
|
|
||||||
seq_id_0[0] = seq_id;
|
|
||||||
seq_ids [n_tokens] = nullptr;
|
|
||||||
batch = {
|
|
||||||
/*n_tokens =*/ n_tokens,
|
|
||||||
/*tokens =*/ nullptr,
|
|
||||||
/*embd =*/ embd,
|
|
||||||
/*pos =*/ pos.data(),
|
|
||||||
/*n_seq_id =*/ n_seq_id.data(),
|
|
||||||
/*seq_id =*/ seq_ids.data(),
|
|
||||||
/*logits =*/ logits.data(),
|
|
||||||
};
|
|
||||||
for (int i = 0; i < n_tokens; i++) {
|
|
||||||
batch.pos [i] = pos_0 + i;
|
|
||||||
batch.n_seq_id[i] = 1;
|
|
||||||
batch.seq_id [i] = seq_id_0.data();
|
|
||||||
batch.logits [i] = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_server_context
|
struct llama_server_context
|
||||||
{
|
{
|
||||||
llama_model *model = nullptr;
|
llama_model *model = nullptr;
|
||||||
@@ -431,7 +398,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
clip_ctx *clp_ctx = nullptr;
|
clip_ctx *clp_ctx = nullptr;
|
||||||
|
|
||||||
common_params params;
|
gpt_params params;
|
||||||
|
|
||||||
llama_batch batch;
|
llama_batch batch;
|
||||||
|
|
||||||
@@ -474,7 +441,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool load_model(const common_params ¶ms_)
|
bool load_model(const gpt_params ¶ms_)
|
||||||
{
|
{
|
||||||
params = params_;
|
params = params_;
|
||||||
if (!params.mmproj.empty()) {
|
if (!params.mmproj.empty()) {
|
||||||
@@ -482,7 +449,7 @@ struct llama_server_context
|
|||||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||||
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
||||||
if(clp_ctx == nullptr) {
|
if(clp_ctx == nullptr) {
|
||||||
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -491,12 +458,10 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
common_init_result common_init = common_init_from_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
model = common_init.model;
|
|
||||||
ctx = common_init.context;
|
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
LOG_ERROR("unable to load model", {{"model", params.model}});
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -504,7 +469,7 @@ struct llama_server_context
|
|||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
const int n_embd_llm = llama_n_embd(model);
|
const int n_embd_llm = llama_n_embd(model);
|
||||||
if (n_embd_clip != n_embd_llm) {
|
if (n_embd_clip != n_embd_llm) {
|
||||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return false;
|
return false;
|
||||||
@@ -513,7 +478,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
n_ctx = llama_n_ctx(ctx);
|
n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
add_bos_token = llama_add_bos_token(model);
|
add_bos_token = llama_should_add_bos_token(model);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -523,21 +488,11 @@ struct llama_server_context
|
|||||||
std::vector<char> buf(1);
|
std::vector<char> buf(1);
|
||||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
||||||
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_client_slot* get_active_slot() {
|
|
||||||
for (llama_client_slot& slot : slots) {
|
|
||||||
// Check if the slot is currently processing
|
|
||||||
if (slot.is_processing()) {
|
|
||||||
return &slot; // Return the active slot
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nullptr; // No active slot found
|
|
||||||
}
|
|
||||||
|
|
||||||
void initialize() {
|
void initialize() {
|
||||||
// create slots
|
// create slots
|
||||||
all_slots_are_idle = true;
|
all_slots_are_idle = true;
|
||||||
@@ -611,12 +566,12 @@ struct llama_server_context
|
|||||||
std::vector<llama_token> p;
|
std::vector<llama_token> p;
|
||||||
if (first)
|
if (first)
|
||||||
{
|
{
|
||||||
p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
||||||
}
|
}
|
||||||
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||||
}
|
}
|
||||||
@@ -633,7 +588,7 @@ struct llama_server_context
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto s = json_prompt.template get<std::string>();
|
auto s = json_prompt.template get<std::string>();
|
||||||
prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return prompt_tokens;
|
return prompt_tokens;
|
||||||
@@ -662,7 +617,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||||
slot_params default_params;
|
slot_params default_params;
|
||||||
common_sampler_params default_sparams;
|
llama_sampling_params default_sparams;
|
||||||
|
|
||||||
slot->params.stream = json_value(data, "stream", false);
|
slot->params.stream = json_value(data, "stream", false);
|
||||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||||
@@ -671,7 +626,7 @@ struct llama_server_context
|
|||||||
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
||||||
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
||||||
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||||
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
||||||
@@ -684,7 +639,7 @@ struct llama_server_context
|
|||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
@@ -708,7 +663,6 @@ struct llama_server_context
|
|||||||
slot->params.input_prefix = "";
|
slot->params.input_prefix = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (data.count("input_suffix") != 0)
|
if (data.count("input_suffix") != 0)
|
||||||
{
|
{
|
||||||
slot->params.input_suffix = data["input_suffix"];
|
slot->params.input_suffix = data["input_suffix"];
|
||||||
@@ -727,10 +681,6 @@ struct llama_server_context
|
|||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (json_value(data, "ignore_eos", false)) {
|
|
||||||
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
slot->sparams.penalty_prompt_tokens.clear();
|
slot->sparams.penalty_prompt_tokens.clear();
|
||||||
slot->sparams.use_penalty_prompt_tokens = false;
|
slot->sparams.use_penalty_prompt_tokens = false;
|
||||||
const auto &penalty_prompt = data.find("penalty_prompt");
|
const auto &penalty_prompt = data.find("penalty_prompt");
|
||||||
@@ -766,10 +716,14 @@ struct llama_server_context
|
|||||||
slot->sparams.use_penalty_prompt_tokens = true;
|
slot->sparams.use_penalty_prompt_tokens = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
slot->sparams.logit_bias.clear();
|
slot->sparams.logit_bias.clear();
|
||||||
|
|
||||||
|
if (json_value(data, "ignore_eos", false))
|
||||||
|
{
|
||||||
|
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
||||||
|
}
|
||||||
|
|
||||||
const auto &logit_bias = data.find("logit_bias");
|
const auto &logit_bias = data.find("logit_bias");
|
||||||
if (logit_bias != data.end() && logit_bias->is_array())
|
if (logit_bias != data.end() && logit_bias->is_array())
|
||||||
{
|
{
|
||||||
@@ -797,15 +751,15 @@ struct llama_server_context
|
|||||||
llama_token tok = el[0].get<llama_token>();
|
llama_token tok = el[0].get<llama_token>();
|
||||||
if (tok >= 0 && tok < n_vocab)
|
if (tok >= 0 && tok < n_vocab)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias.push_back({tok, bias});
|
slot->sparams.logit_bias[tok] = bias;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (el[0].is_string())
|
else if (el[0].is_string())
|
||||||
{
|
{
|
||||||
auto toks = common_tokenize(model, el[0].get<std::string>(), false);
|
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
|
||||||
for (auto tok : toks)
|
for (auto tok : toks)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias.push_back({tok, bias});
|
slot->sparams.logit_bias[tok] = bias;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -826,22 +780,24 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto & samplers = data.find("samplers");
|
const auto &samplers_sequence = data.find("samplers");
|
||||||
if (samplers != data.end() && samplers->is_array()) {
|
if (samplers_sequence != data.end() && samplers_sequence->is_array())
|
||||||
|
{
|
||||||
std::vector<std::string> sampler_names;
|
std::vector<std::string> sampler_names;
|
||||||
for (const auto & name : *samplers) {
|
for (const auto &sampler_name : *samplers_sequence)
|
||||||
if (name.is_string()) {
|
{
|
||||||
sampler_names.emplace_back(name);
|
if (sampler_name.is_string())
|
||||||
|
{
|
||||||
|
sampler_names.emplace_back(sampler_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false);
|
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
slot->sparams.samplers = default_sparams.samplers;
|
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (multimodal)
|
if (multimodal)
|
||||||
{
|
{
|
||||||
const auto &images_data = data.find("image_data");
|
const auto &images_data = data.find("image_data");
|
||||||
@@ -856,11 +812,10 @@ struct llama_server_context
|
|||||||
img_sl.img_data = clip_image_u8_init();
|
img_sl.img_data = clip_image_u8_init();
|
||||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
||||||
{
|
{
|
||||||
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
LOG_ERROR("failed to load image", {
|
||||||
__func__,
|
{"slot_id", slot->id},
|
||||||
slot->id,
|
{"img_sl_id", img_sl.id}
|
||||||
img_sl.id
|
});
|
||||||
);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOG_VERBOSE("image loaded", {
|
LOG_VERBOSE("image loaded", {
|
||||||
@@ -898,12 +853,12 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (const std::invalid_argument& e) {
|
} catch (const std::invalid_argument& e) {
|
||||||
LOG("Invalid image number id in prompt\n");
|
LOG_TEE("Invalid image number id in prompt\n");
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -918,10 +873,10 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (slot->ctx_sampling != nullptr)
|
if (slot->ctx_sampling != nullptr)
|
||||||
{
|
{
|
||||||
common_sampler_free(slot->ctx_sampling);
|
llama_sampling_free(slot->ctx_sampling);
|
||||||
}
|
}
|
||||||
slot->ctx_sampling = common_sampler_init(model, slot->sparams);
|
slot->ctx_sampling = llama_sampling_init(slot->sparams);
|
||||||
//llama_set_rng_seed(ctx, slot->params.seed);
|
llama_set_rng_seed(ctx, slot->params.seed);
|
||||||
slot->command = LOAD_PROMPT;
|
slot->command = LOAD_PROMPT;
|
||||||
|
|
||||||
all_slots_are_idle = false;
|
all_slots_are_idle = false;
|
||||||
@@ -931,8 +886,6 @@ struct llama_server_context
|
|||||||
{"task_id", slot->task_id},
|
{"task_id", slot->task_id},
|
||||||
});
|
});
|
||||||
|
|
||||||
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -947,13 +900,13 @@ struct llama_server_context
|
|||||||
system_tokens.clear();
|
system_tokens.clear();
|
||||||
|
|
||||||
if (!system_prompt.empty()) {
|
if (!system_prompt.empty()) {
|
||||||
system_tokens = common_tokenize(ctx, system_prompt, add_bos_token);
|
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
|
||||||
|
|
||||||
common_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
for (int i = 0; i < (int)system_tokens.size(); ++i)
|
for (int i = 0; i < (int)system_tokens.size(); ++i)
|
||||||
{
|
{
|
||||||
common_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
|
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
|
||||||
@@ -967,10 +920,11 @@ struct llama_server_context
|
|||||||
batch.n_seq_id + i,
|
batch.n_seq_id + i,
|
||||||
batch.seq_id + i,
|
batch.seq_id + i,
|
||||||
batch.logits + i,
|
batch.logits + i,
|
||||||
|
0, 0, 0, // unused
|
||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view) != 0)
|
if (llama_decode(ctx, batch_view) != 0)
|
||||||
{
|
{
|
||||||
LOG("%s: llama_decode() failed\n", __func__);
|
LOG_TEE("%s: llama_decode() failed\n", __func__);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -982,7 +936,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG("system prompt updated\n");
|
LOG_TEE("system prompt updated\n");
|
||||||
system_need_update = false;
|
system_need_update = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1041,20 +995,18 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool process_token(completion_token_output &result, llama_client_slot &slot) {
|
bool process_token(completion_token_output &result, llama_client_slot &slot) {
|
||||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||||
const std::string token_str = common_token_to_piece(ctx, result.tok);
|
const std::string token_str = llama_token_to_piece(ctx, result.tok);
|
||||||
slot.sampled = result.tok;
|
slot.sampled = result.tok;
|
||||||
|
|
||||||
// search stop word and delete it
|
// search stop word and delete it
|
||||||
slot.generated_text += token_str;
|
slot.generated_text += token_str;
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
|
|
||||||
/*
|
|
||||||
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
|
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
|
||||||
{
|
{
|
||||||
// we can change penalty_prompt_tokens because it is always created from scratch each request
|
// we can change penalty_prompt_tokens because it is always created from scratch each request
|
||||||
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
|
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
// check if there is incomplete UTF-8 character at the end
|
// check if there is incomplete UTF-8 character at the end
|
||||||
bool incomplete = false;
|
bool incomplete = false;
|
||||||
@@ -1132,7 +1084,7 @@ struct llama_server_context
|
|||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.tok == llama_token_eos(model))
|
if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
|
||||||
{
|
{
|
||||||
slot.stopped_eos = true;
|
slot.stopped_eos = true;
|
||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
@@ -1163,8 +1115,8 @@ struct llama_server_context
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||||
LOG("Error processing the given image");
|
LOG_TEE("Error processing the given image");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1176,7 +1128,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
void send_error(task_server& task, const std::string &error)
|
void send_error(task_server& task, const std::string &error)
|
||||||
{
|
{
|
||||||
LOG("task %i - error: %s\n", task.id, error.c_str());
|
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
|
||||||
task_result res;
|
task_result res;
|
||||||
res.id = task.id;
|
res.id = task.id;
|
||||||
res.multitask_id = task.multitask_id;
|
res.multitask_id = task.multitask_id;
|
||||||
@@ -1188,11 +1140,13 @@ struct llama_server_context
|
|||||||
|
|
||||||
json get_formated_generation(llama_client_slot &slot)
|
json get_formated_generation(llama_client_slot &slot)
|
||||||
{
|
{
|
||||||
std::vector<std::string> samplers;
|
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
||||||
samplers.reserve(slot.sparams.samplers.size());
|
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
||||||
for (const auto & sampler : slot.sparams.samplers)
|
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
|
||||||
|
std::vector<std::string> samplers_sequence;
|
||||||
|
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
||||||
{
|
{
|
||||||
samplers.emplace_back(common_sampler_type_to_str(sampler));
|
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
return json {
|
return json {
|
||||||
@@ -1207,11 +1161,13 @@ struct llama_server_context
|
|||||||
{"top_p", slot.sparams.top_p},
|
{"top_p", slot.sparams.top_p},
|
||||||
{"min_p", slot.sparams.min_p},
|
{"min_p", slot.sparams.min_p},
|
||||||
{"tfs_z", slot.sparams.tfs_z},
|
{"tfs_z", slot.sparams.tfs_z},
|
||||||
{"typical_p", slot.sparams.typ_p},
|
{"typical_p", slot.sparams.typical_p},
|
||||||
{"repeat_last_n", slot.sparams.penalty_last_n},
|
{"repeat_last_n", slot.sparams.penalty_last_n},
|
||||||
{"repeat_penalty", slot.sparams.penalty_repeat},
|
{"repeat_penalty", slot.sparams.penalty_repeat},
|
||||||
{"presence_penalty", slot.sparams.penalty_present},
|
{"presence_penalty", slot.sparams.penalty_present},
|
||||||
{"frequency_penalty", slot.sparams.penalty_freq},
|
{"frequency_penalty", slot.sparams.penalty_freq},
|
||||||
|
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
|
||||||
|
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
|
||||||
{"mirostat", slot.sparams.mirostat},
|
{"mirostat", slot.sparams.mirostat},
|
||||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||||
@@ -1219,13 +1175,13 @@ struct llama_server_context
|
|||||||
{"stop", slot.params.antiprompt},
|
{"stop", slot.params.antiprompt},
|
||||||
{"n_predict", slot.params.n_predict},
|
{"n_predict", slot.params.n_predict},
|
||||||
{"n_keep", params.n_keep},
|
{"n_keep", params.n_keep},
|
||||||
{"ignore_eos", slot.sparams.ignore_eos},
|
{"ignore_eos", ignore_eos},
|
||||||
{"stream", slot.params.stream},
|
{"stream", slot.params.stream},
|
||||||
// {"logit_bias", slot.sparams.logit_bias},
|
{"logit_bias", slot.sparams.logit_bias},
|
||||||
{"n_probs", slot.sparams.n_probs},
|
{"n_probs", slot.sparams.n_probs},
|
||||||
{"min_keep", slot.sparams.min_keep},
|
{"min_keep", slot.sparams.min_keep},
|
||||||
{"grammar", slot.sparams.grammar},
|
{"grammar", slot.sparams.grammar},
|
||||||
{"samplers", samplers}
|
{"samplers", samplers_sequence}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1248,7 +1204,7 @@ struct llama_server_context
|
|||||||
if (slot.sparams.n_probs > 0)
|
if (slot.sparams.n_probs > 0)
|
||||||
{
|
{
|
||||||
std::vector<completion_token_output> probs_output = {};
|
std::vector<completion_token_output> probs_output = {};
|
||||||
const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false);
|
const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
|
||||||
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
|
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
|
||||||
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
|
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
|
||||||
if (probs_pos < probs_stop_pos)
|
if (probs_pos < probs_stop_pos)
|
||||||
@@ -1300,7 +1256,7 @@ struct llama_server_context
|
|||||||
std::vector<completion_token_output> probs = {};
|
std::vector<completion_token_output> probs = {};
|
||||||
if (!slot.params.stream && slot.stopped_word)
|
if (!slot.params.stream && slot.stopped_word)
|
||||||
{
|
{
|
||||||
const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false);
|
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
|
||||||
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
|
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1411,10 +1367,11 @@ struct llama_server_context
|
|||||||
batch.n_seq_id + i,
|
batch.n_seq_id + i,
|
||||||
batch.seq_id + i,
|
batch.seq_id + i,
|
||||||
batch.logits + i,
|
batch.logits + i,
|
||||||
|
0, 0, 0, // unused
|
||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view))
|
if (llama_decode(ctx, batch_view))
|
||||||
{
|
{
|
||||||
LOG("%s : failed to eval\n", __func__);
|
LOG_TEE("%s : failed to eval\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1429,18 +1386,17 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int n_embd = llama_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
float * embd = img.image_embedding + i * n_embd;
|
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
||||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
if (llama_decode(ctx, batch_img))
|
||||||
if (llama_decode(ctx, llava_batch.batch))
|
|
||||||
{
|
{
|
||||||
LOG("%s : failed to eval image\n", __func__);
|
LOG_TEE("%s : failed to eval image\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
slot.n_past += n_eval;
|
slot.n_past += n_eval;
|
||||||
}
|
}
|
||||||
image_idx++;
|
image_idx++;
|
||||||
|
|
||||||
common_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
// append prefix of next image
|
// append prefix of next image
|
||||||
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
||||||
@@ -1450,7 +1406,7 @@ struct llama_server_context
|
|||||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
||||||
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
||||||
{
|
{
|
||||||
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
||||||
slot.n_past += 1;
|
slot.n_past += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1582,7 +1538,7 @@ struct llama_server_context
|
|||||||
update_system_prompt();
|
update_system_prompt();
|
||||||
}
|
}
|
||||||
|
|
||||||
common_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
if (all_slots_are_idle)
|
if (all_slots_are_idle)
|
||||||
{
|
{
|
||||||
@@ -1616,7 +1572,7 @@ struct llama_server_context
|
|||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.truncated = false;
|
slot.truncated = false;
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
// END LOCALAI changes
|
// END LOCALAI changes
|
||||||
@@ -1660,7 +1616,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
// TODO: we always have to take into account the "system_tokens"
|
// TODO: we always have to take into account the "system_tokens"
|
||||||
// this is not great and needs to be improved somehow
|
// this is not great and needs to be improved somehow
|
||||||
common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
|
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
|
||||||
slot.n_past += 1;
|
slot.n_past += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1754,7 +1710,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (!slot.params.cache_prompt)
|
if (!slot.params.cache_prompt)
|
||||||
{
|
{
|
||||||
common_sampler_reset(slot.ctx_sampling);
|
llama_sampling_reset(slot.ctx_sampling);
|
||||||
|
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.n_past_se = 0;
|
slot.n_past_se = 0;
|
||||||
@@ -1766,7 +1722,7 @@ struct llama_server_context
|
|||||||
// push the prompt into the sampling context (do not apply grammar)
|
// push the prompt into the sampling context (do not apply grammar)
|
||||||
for (auto &token : prompt_tokens)
|
for (auto &token : prompt_tokens)
|
||||||
{
|
{
|
||||||
common_sampler_accept(slot.ctx_sampling, token, false);
|
llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
||||||
@@ -1858,17 +1814,16 @@ struct llama_server_context
|
|||||||
ga_i += ga_w/ga_n;
|
ga_i += ga_w/ga_n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
|
llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
|
||||||
slot_npast++;
|
slot_npast++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_images && !ingest_images(slot, n_batch))
|
if (has_images && !ingest_images(slot, n_batch))
|
||||||
{
|
{
|
||||||
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
|
LOG_ERROR("failed processing images", {
|
||||||
__func__,
|
"slot_id", slot.id,
|
||||||
slot.id,
|
"task_id", slot.task_id,
|
||||||
slot.task_id
|
});
|
||||||
);
|
|
||||||
// FIXME @phymbert: to be properly tested
|
// FIXME @phymbert: to be properly tested
|
||||||
// early returning without changing the slot state will block the slot for ever
|
// early returning without changing the slot state will block the slot for ever
|
||||||
// no one at the moment is checking the return value
|
// no one at the moment is checking the return value
|
||||||
@@ -1908,10 +1863,10 @@ struct llama_server_context
|
|||||||
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
||||||
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
||||||
|
|
||||||
LOG("\n");
|
LOG_TEE("\n");
|
||||||
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
||||||
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
||||||
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
||||||
|
|
||||||
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
||||||
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
||||||
@@ -1921,7 +1876,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
slot.ga_i += slot.ga_w / slot.ga_n;
|
slot.ga_i += slot.ga_w / slot.ga_n;
|
||||||
|
|
||||||
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
||||||
}
|
}
|
||||||
slot.n_past_se += n_tokens;
|
slot.n_past_se += n_tokens;
|
||||||
}
|
}
|
||||||
@@ -1936,6 +1891,7 @@ struct llama_server_context
|
|||||||
batch.n_seq_id + i,
|
batch.n_seq_id + i,
|
||||||
batch.seq_id + i,
|
batch.seq_id + i,
|
||||||
batch.logits + i,
|
batch.logits + i,
|
||||||
|
0, 0, 0, // unused
|
||||||
};
|
};
|
||||||
|
|
||||||
const int ret = llama_decode(ctx, batch_view);
|
const int ret = llama_decode(ctx, batch_view);
|
||||||
@@ -1945,11 +1901,11 @@ struct llama_server_context
|
|||||||
if (n_batch == 1 || ret < 0)
|
if (n_batch == 1 || ret < 0)
|
||||||
{
|
{
|
||||||
// if you get here, it means the KV cache is full - try increasing it via the context size
|
// if you get here, it means the KV cache is full - try increasing it via the context size
|
||||||
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
||||||
|
|
||||||
// retry with half the batch size to try to find a free slot in the KV cache
|
// retry with half the batch size to try to find a free slot in the KV cache
|
||||||
n_batch /= 2;
|
n_batch /= 2;
|
||||||
@@ -1974,9 +1930,9 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
completion_token_output result;
|
completion_token_output result;
|
||||||
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
|
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
|
||||||
|
|
||||||
common_sampler_accept(slot.ctx_sampling, id, true);
|
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
|
||||||
|
|
||||||
slot.n_decoded += 1;
|
slot.n_decoded += 1;
|
||||||
if (slot.n_decoded == 1)
|
if (slot.n_decoded == 1)
|
||||||
@@ -1986,14 +1942,19 @@ struct llama_server_context
|
|||||||
metrics.on_prompt_eval(slot);
|
metrics.on_prompt_eval(slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
|
||||||
result.tok = id;
|
result.tok = id;
|
||||||
const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
|
const int32_t n_probs = slot.sparams.n_probs;
|
||||||
result.probs.push_back({
|
if (slot.sparams.temp <= 0 && n_probs > 0)
|
||||||
cur_p->data[i].id,
|
{
|
||||||
i >= cur_p->size ? 0.0f : cur_p->data[i].p,
|
// for llama_sample_token_greedy we need to sort candidates
|
||||||
});
|
llama_sample_softmax(ctx, &cur_p);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
|
||||||
|
{
|
||||||
|
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!process_token(result, slot))
|
if (!process_token(result, slot))
|
||||||
@@ -2040,7 +2001,7 @@ static json format_partial_response(
|
|||||||
struct token_translator
|
struct token_translator
|
||||||
{
|
{
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); }
|
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
|
||||||
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
|
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2145,10 +2106,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["grammar"] = predict->grammar();
|
data["grammar"] = predict->grammar();
|
||||||
data["prompt"] = predict->prompt();
|
data["prompt"] = predict->prompt();
|
||||||
data["ignore_eos"] = predict->ignoreeos();
|
data["ignore_eos"] = predict->ignoreeos();
|
||||||
data["embeddings"] = predict->embeddings();
|
|
||||||
|
|
||||||
// Add the correlationid to json data
|
|
||||||
data["correlation_id"] = predict->correlationid();
|
|
||||||
|
|
||||||
// for each image in the request, add the image data
|
// for each image in the request, add the image data
|
||||||
//
|
//
|
||||||
@@ -2234,7 +2191,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
static void params_parse(const backend::ModelOptions* request,
|
static void params_parse(const backend::ModelOptions* request,
|
||||||
common_params & params) {
|
gpt_params & params) {
|
||||||
|
|
||||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||||
|
|
||||||
@@ -2248,7 +2205,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.model_alias = request->modelfile();
|
params.model_alias = request->modelfile();
|
||||||
params.n_ctx = request->contextsize();
|
params.n_ctx = request->contextsize();
|
||||||
//params.memory_f16 = request->f16memory();
|
//params.memory_f16 = request->f16memory();
|
||||||
params.cpuparams.n_threads = request->threads();
|
params.n_threads = request->threads();
|
||||||
params.n_gpu_layers = request->ngpulayers();
|
params.n_gpu_layers = request->ngpulayers();
|
||||||
params.n_batch = request->nbatch();
|
params.n_batch = request->nbatch();
|
||||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||||
@@ -2260,12 +2217,6 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
} else {
|
} else {
|
||||||
params.n_parallel = 1;
|
params.n_parallel = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
|
||||||
if (llama_grpc_servers != NULL) {
|
|
||||||
params.rpc_servers = std::string(llama_grpc_servers);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Add yarn
|
// TODO: Add yarn
|
||||||
|
|
||||||
if (!request->tensorsplit().empty()) {
|
if (!request->tensorsplit().empty()) {
|
||||||
@@ -2298,13 +2249,11 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
}
|
}
|
||||||
// get the directory of modelfile
|
// get the directory of modelfile
|
||||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||||
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
|
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
|
||||||
|
params.lora_base = model_dir + "/"+request->lorabase();
|
||||||
}
|
}
|
||||||
params.use_mlock = request->mlock();
|
params.use_mlock = request->mlock();
|
||||||
params.use_mmap = request->mmap();
|
params.use_mmap = request->mmap();
|
||||||
params.flash_attn = request->flashattention();
|
|
||||||
params.no_kv_offload = request->nokvoffload();
|
|
||||||
|
|
||||||
params.embedding = request->embeddings();
|
params.embedding = request->embeddings();
|
||||||
|
|
||||||
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
||||||
@@ -2342,7 +2291,7 @@ public:
|
|||||||
|
|
||||||
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
||||||
// Implement LoadModel RPC
|
// Implement LoadModel RPC
|
||||||
common_params params;
|
gpt_params params;
|
||||||
params_parse(request, params);
|
params_parse(request, params);
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
@@ -2383,15 +2332,6 @@ public:
|
|||||||
std::string completion_text = result.result_json.value("content", "");
|
std::string completion_text = result.result_json.value("content", "");
|
||||||
|
|
||||||
reply.set_message(completion_text);
|
reply.set_message(completion_text);
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
|
||||||
reply.set_tokens(tokens_predicted);
|
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
|
||||||
|
|
||||||
// Log Request Correlation Id
|
|
||||||
LOG_VERBOSE("correlation:", {
|
|
||||||
{ "id", data["correlation_id"] }
|
|
||||||
});
|
|
||||||
|
|
||||||
// Send the reply
|
// Send the reply
|
||||||
writer->Write(reply);
|
writer->Write(reply);
|
||||||
@@ -2416,17 +2356,7 @@ public:
|
|||||||
std::string completion_text;
|
std::string completion_text;
|
||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
|
|
||||||
// Log Request Correlation Id
|
|
||||||
LOG_VERBOSE("correlation:", {
|
|
||||||
{ "id", data["correlation_id"] }
|
|
||||||
});
|
|
||||||
|
|
||||||
completion_text = result.result_json.value("content", "");
|
completion_text = result.result_json.value("content", "");
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
|
||||||
reply->set_prompt_tokens(tokens_evaluated);
|
|
||||||
reply->set_tokens(tokens_predicted);
|
|
||||||
reply->set_message(completion_text);
|
reply->set_message(completion_text);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -2436,56 +2366,6 @@ public:
|
|||||||
|
|
||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
|
|
||||||
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
|
|
||||||
json data = parse_options(false, request, llama);
|
|
||||||
const int task_id = llama.queue_tasks.get_new_id();
|
|
||||||
llama.queue_results.add_waiting_task_id(task_id);
|
|
||||||
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
|
|
||||||
// get the result
|
|
||||||
task_result result = llama.queue_results.recv(task_id);
|
|
||||||
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
|
|
||||||
llama.queue_results.remove_waiting_task_id(task_id);
|
|
||||||
if (!result.error && result.stop) {
|
|
||||||
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
|
|
||||||
// loop the vector and set the embeddings results
|
|
||||||
for (int i = 0; i < embeddings.size(); i++) {
|
|
||||||
embeddingResult->add_embeddings(embeddings[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return grpc::Status::OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
return grpc::Status::OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
|
||||||
llama_client_slot* active_slot = llama.get_active_slot();
|
|
||||||
|
|
||||||
if (active_slot != nullptr) {
|
|
||||||
// Calculate the tokens per second using existing logic
|
|
||||||
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
|
|
||||||
|
|
||||||
// Populate the response with metrics
|
|
||||||
response->set_slot_id(active_slot->id);
|
|
||||||
response->set_prompt_json_for_slot(active_slot->prompt.dump());
|
|
||||||
response->set_tokens_per_second(tokens_per_second);
|
|
||||||
response->set_tokens_generated(active_slot->n_decoded);
|
|
||||||
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
|
|
||||||
} else {
|
|
||||||
// Handle case when no active slot exists
|
|
||||||
response->set_slot_id(0);
|
|
||||||
response->set_prompt_json_for_slot("");
|
|
||||||
response->set_tokens_per_second(0);
|
|
||||||
response->set_tokens_generated(0);
|
|
||||||
response->set_prompt_tokens_processed(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return grpc::Status::OK;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void RunServer(const std::string& server_address) {
|
void RunServer(const std::string& server_address) {
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
|
||||||
index 342042ff..224db9b5 100644
|
|
||||||
--- a/examples/llava/clip.cpp
|
|
||||||
+++ b/examples/llava/clip.cpp
|
|
||||||
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
|
||||||
for (int i = 0; i < num_patches; i++) {
|
|
||||||
- patches_data[i] = i + 1;
|
|
||||||
+ patches_data[i] = i;
|
|
||||||
}
|
|
||||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
|
||||||
free(patches_data);
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
## Patches
|
|
||||||
## Apply patches from the `patches` directory
|
|
||||||
for patch in $(ls patches); do
|
|
||||||
echo "Applying patch $patch"
|
|
||||||
patch -d llama.cpp/ -p1 < patches/$patch
|
|
||||||
done
|
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
|
||||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
|
||||||
|
|
||||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
|
||||||
echo "grpc-server already added"
|
|
||||||
else
|
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
|
||||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
|
||||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
|
||||||
@@ -481,3 +481,30 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// random string / id
|
||||||
|
//
|
||||||
|
|
||||||
|
static std::string random_string()
|
||||||
|
{
|
||||||
|
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
|
||||||
|
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 generator(rd());
|
||||||
|
|
||||||
|
std::string result(32, ' ');
|
||||||
|
|
||||||
|
for (int i = 0; i < 32; ++i) {
|
||||||
|
result[i] = str[generator() % str.size()];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string gen_chatcmplid()
|
||||||
|
{
|
||||||
|
std::stringstream chatcmplid;
|
||||||
|
chatcmplid << "chatcmpl-" << random_string();
|
||||||
|
return chatcmplid.str();
|
||||||
|
}
|
||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -3,9 +3,9 @@ package main
|
|||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Image struct {
|
type Image struct {
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -3,9 +3,9 @@ package main
|
|||||||
// This is a wrapper to statisfy the GRPC service interface
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/tinydream"
|
"github.com/go-skynet/LocalAI/pkg/tinydream"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Image struct {
|
type Image struct {
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
bert "github.com/go-skynet/go-bert.cpp"
|
bert "github.com/go-skynet/go-bert.cpp"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Embeddings struct {
|
type Embeddings struct {
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
62
backend/go/llm/gpt4all/gpt4all.go
Normal file
62
backend/go/llm/gpt4all/gpt4all.go
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||||
|
)
|
||||||
|
|
||||||
|
type LLM struct {
|
||||||
|
base.SingleThread
|
||||||
|
|
||||||
|
gpt4all *gpt4all.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
|
model, err := gpt4all.New(opts.ModelFile,
|
||||||
|
gpt4all.SetThreads(int(opts.Threads)),
|
||||||
|
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
||||||
|
llm.gpt4all = model
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
|
||||||
|
predictOptions := []gpt4all.PredictOption{
|
||||||
|
gpt4all.SetTemperature(float64(opts.Temperature)),
|
||||||
|
gpt4all.SetTopP(float64(opts.TopP)),
|
||||||
|
gpt4all.SetTopK(int(opts.TopK)),
|
||||||
|
gpt4all.SetTokens(int(opts.Tokens)),
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Batch != 0 {
|
||||||
|
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
|
||||||
|
}
|
||||||
|
return predictOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
llm.gpt4all.SetTokenCallback(func(token string) bool {
|
||||||
|
results <- token
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("err: ", err)
|
||||||
|
}
|
||||||
|
llm.gpt4all.SetTokenCallback(nil)
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
21
backend/go/llm/gpt4all/main.go
Normal file
21
backend/go/llm/gpt4all/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,11 +4,10 @@ package main
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/mudler/LocalAI/pkg/langchain"
|
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
@@ -19,14 +18,9 @@ type LLM struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
var err error
|
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
||||||
hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
|
|
||||||
if hfToken == "" {
|
|
||||||
return fmt.Errorf("no huggingface token provided")
|
|
||||||
}
|
|
||||||
llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
|
|
||||||
llm.model = opts.Model
|
llm.model = opts.Model
|
||||||
return err
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ package main
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/go-llama.cpp"
|
"github.com/go-skynet/go-llama.cpp"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -6,9 +6,9 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/go-llama.cpp"
|
"github.com/go-skynet/go-llama.cpp"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/donomii/go-rwkv.cpp"
|
"github.com/donomii/go-rwkv.cpp"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
const tokenizerSuffix = ".tokenizer.json"
|
const tokenizerSuffix = ".tokenizer.json"
|
||||||
@@ -31,7 +31,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||||||
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
|
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
|
||||||
|
|
||||||
if model == nil {
|
if model == nil {
|
||||||
return fmt.Errorf("rwkv could not load model")
|
return fmt.Errorf("could not load model")
|
||||||
}
|
}
|
||||||
llm.rwkv = model
|
llm.rwkv = model
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
//go:build debug
|
|
||||||
// +build debug
|
|
||||||
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
func assert(cond bool, msg string) {
|
|
||||||
if !cond {
|
|
||||||
log.Fatal().Stack().Msg(msg)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each store
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
"github.com/rs/zerolog"
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
|
||||||
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, NewStore()); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
//go:build !debug
|
|
||||||
// +build !debug
|
|
||||||
|
|
||||||
package main
|
|
||||||
|
|
||||||
func assert(cond bool, msg string) {
|
|
||||||
}
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user