mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
278 Commits
docker_ima
...
llama31_gr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ff55f311cb | ||
|
|
0802895cd2 | ||
|
|
9fee46207a | ||
|
|
bd900945f7 | ||
|
|
89484efaed | ||
|
|
a9757fb057 | ||
|
|
1c96e0b79e | ||
|
|
c7f0743f48 | ||
|
|
ead69a116a | ||
|
|
0314b37cd8 | ||
|
|
703cd08f01 | ||
|
|
b53947a5bb | ||
|
|
39de3cf21d | ||
|
|
e3cd11cc0a | ||
|
|
5e5037f10d | ||
|
|
9c331239d9 | ||
|
|
36789e9ead | ||
|
|
6ec593c237 | ||
|
|
bbb1dc2ae0 | ||
|
|
385d8dc29b | ||
|
|
fb574434a4 | ||
|
|
7ab3217df0 | ||
|
|
2f9f04b260 | ||
|
|
8385eb2a59 | ||
|
|
99324eeef0 | ||
|
|
ede352256b | ||
|
|
b555b64616 | ||
|
|
824cc816ea | ||
|
|
a1bc2e9771 | ||
|
|
9fc09b32cf | ||
|
|
8ec7a0a407 | ||
|
|
d3166e8571 | ||
|
|
2966979161 | ||
|
|
f4ed47bf95 | ||
|
|
1a75546b27 | ||
|
|
a6b92af875 | ||
|
|
3dc601c470 | ||
|
|
153e977155 | ||
|
|
7d61de63ae | ||
|
|
bcd9e153ba | ||
|
|
19282af059 | ||
|
|
9c0c11e8a0 | ||
|
|
3f7eddb039 | ||
|
|
77ad49333a | ||
|
|
ef5e8326c8 | ||
|
|
86509e6002 | ||
|
|
8667a67695 | ||
|
|
f505d7ab3f | ||
|
|
450dbed820 | ||
|
|
46b86f7e6e | ||
|
|
0ee1f8c1cf | ||
|
|
87bd831aba | ||
|
|
f9f83791d1 | ||
|
|
e75f73bf73 | ||
|
|
bd277162c7 | ||
|
|
f19ee465d2 | ||
|
|
7b85ff7280 | ||
|
|
134cb993c2 | ||
|
|
2cf28f3c01 | ||
|
|
18c0f4718d | ||
|
|
f878b63ee4 | ||
|
|
6eaa01db15 | ||
|
|
1d605073a4 | ||
|
|
fc29c04f82 | ||
|
|
63fc22baab | ||
|
|
6a919b30ac | ||
|
|
3f7ec2e596 | ||
|
|
82d5123c1e | ||
|
|
252961751c | ||
|
|
031627584b | ||
|
|
24a8eebcef | ||
|
|
bf9dd1de7f | ||
|
|
35d55572ac | ||
|
|
c7357a9872 | ||
|
|
27e16a00fa | ||
|
|
919e2e4369 | ||
|
|
96f67efe32 | ||
|
|
607900a4bb | ||
|
|
53c8ab1020 | ||
|
|
81d01e8a5f | ||
|
|
b8b0c7ad0b | ||
|
|
6de12c694a | ||
|
|
25f97910cc | ||
|
|
89bd04c0ac | ||
|
|
195d3b9f03 | ||
|
|
865496f80b | ||
|
|
4a22e54cda | ||
|
|
bd8e2320c3 | ||
|
|
b5661d6302 | ||
|
|
e7813d4ec4 | ||
|
|
d384627fa9 | ||
|
|
1ed6b96dd7 | ||
|
|
53f90218b0 | ||
|
|
112d6a3083 | ||
|
|
1f7cedf5ee | ||
|
|
50cdfe0090 | ||
|
|
c6838d4301 | ||
|
|
4e84764787 | ||
|
|
f521e50fa8 | ||
|
|
09de674b03 | ||
|
|
b1da8aa145 | ||
|
|
58f8f8d381 | ||
|
|
db658adc7a | ||
|
|
01a4f103f5 | ||
|
|
38b3115a15 | ||
|
|
a3eb6e04c1 | ||
|
|
a83fa725e1 | ||
|
|
b591d8c659 | ||
|
|
bc2b8e0063 | ||
|
|
85c7b28364 | ||
|
|
d1a5c343b7 | ||
|
|
7dbf49ab22 | ||
|
|
b375a654e7 | ||
|
|
12a96c520a | ||
|
|
35561edb6e | ||
|
|
6564e7ea01 | ||
|
|
121bce581c | ||
|
|
9d3c6d321e | ||
|
|
23835f8cca | ||
|
|
06c315bcb3 | ||
|
|
e2ac43853f | ||
|
|
b6ddb53ceb | ||
|
|
edea2e7c3a | ||
|
|
2a2ef49b74 | ||
|
|
6585ba2a9c | ||
|
|
fbd0a270b3 | ||
|
|
fc60031ac1 | ||
|
|
85fe197684 | ||
|
|
6489b456dd | ||
|
|
d6ce4b6845 | ||
|
|
57ccd1873d | ||
|
|
b7c0d46170 | ||
|
|
ce035416aa | ||
|
|
4bc92d448c | ||
|
|
9c0df648a6 | ||
|
|
be35dc451c | ||
|
|
5a4dc2b7dc | ||
|
|
9eb27c563c | ||
|
|
405794d4ca | ||
|
|
6ba730d7f8 | ||
|
|
00f257c6f2 | ||
|
|
4f42d865a2 | ||
|
|
4ababe33e4 | ||
|
|
2edb8f8756 | ||
|
|
5305d4dcbf | ||
|
|
2bbbfa849f | ||
|
|
e070134c6a | ||
|
|
c03045c5c7 | ||
|
|
a01274b521 | ||
|
|
bda2222fee | ||
|
|
fbef2aa984 | ||
|
|
b2f9873f84 | ||
|
|
0a60ce9477 | ||
|
|
f5a9381df3 | ||
|
|
84d7dc753a | ||
|
|
f65e9cc22f | ||
|
|
957ae167c5 | ||
|
|
f579954c83 | ||
|
|
1c2e54e5be | ||
|
|
6a60774fa1 | ||
|
|
d3beb2f4e4 | ||
|
|
62aa3bfdb2 | ||
|
|
babd5cc1a0 | ||
|
|
a657aac7dc | ||
|
|
1448ff1309 | ||
|
|
d2c912b2df | ||
|
|
88aff0bc99 | ||
|
|
0b212de447 | ||
|
|
f75213bd81 | ||
|
|
55c2076204 | ||
|
|
705f54257e | ||
|
|
9fa92e14cd | ||
|
|
34f39e8bdf | ||
|
|
87c8f2368b | ||
|
|
20f6f30a31 | ||
|
|
1fe82f70d3 | ||
|
|
7ee93a8b5c | ||
|
|
6b59f79364 | ||
|
|
ffad7890fe | ||
|
|
10491892c4 | ||
|
|
d08a963d1c | ||
|
|
272fbab6f1 | ||
|
|
70aeba7b6a | ||
|
|
3ed03d04df | ||
|
|
b3b8010930 | ||
|
|
30861f49a8 | ||
|
|
5345f30a33 | ||
|
|
de2bf82e09 | ||
|
|
67b20a7147 | ||
|
|
905ed62ee3 | ||
|
|
76bd8083c1 | ||
|
|
d55edf3bfa | ||
|
|
1ad84ec396 | ||
|
|
fc87507012 | ||
|
|
68e15e71be | ||
|
|
bb063ab78a | ||
|
|
6886e4e5ab | ||
|
|
8c08643c65 | ||
|
|
2c2efe2d11 | ||
|
|
db42a93dab | ||
|
|
dcccfc2cce | ||
|
|
96127e9967 | ||
|
|
41bce28d5f | ||
|
|
a00e9a82ae | ||
|
|
95e31fd279 | ||
|
|
fb04347d3b | ||
|
|
f5bcba70da | ||
|
|
d5846c8639 | ||
|
|
664b2e352b | ||
|
|
dcbdc12cc9 | ||
|
|
c87fca3ec1 | ||
|
|
642f6cee75 | ||
|
|
03efa26ff5 | ||
|
|
b6b8ab6c21 | ||
|
|
b60acabb82 | ||
|
|
e7eb81beeb | ||
|
|
e56110543b | ||
|
|
fd0bc21c3e | ||
|
|
3bc5652b27 | ||
|
|
59ef426fbf | ||
|
|
28c6daf916 | ||
|
|
133987b1fb | ||
|
|
cbb93bd8ec | ||
|
|
7223284323 | ||
|
|
8d046de287 | ||
|
|
2845baecd5 | ||
|
|
d5a56f04be | ||
|
|
f120a0c9f9 | ||
|
|
401ee553f4 | ||
|
|
e3c89ac9cd | ||
|
|
b59841cf69 | ||
|
|
cca881ec49 | ||
|
|
dd95ae130f | ||
|
|
185ab93b0d | ||
|
|
bb38f051e6 | ||
|
|
2a05c39adf | ||
|
|
deb5311373 | ||
|
|
bdfebfe0f4 | ||
|
|
3a88299cfe | ||
|
|
748e4cb6b1 | ||
|
|
7c554be4ea | ||
|
|
6011845ee9 | ||
|
|
c184f23621 | ||
|
|
8cec0304ee | ||
|
|
dc51869c61 | ||
|
|
f881d25630 | ||
|
|
683c306f90 | ||
|
|
a985d8c239 | ||
|
|
17608ea6aa | ||
|
|
9280060e05 | ||
|
|
cbcb74e159 | ||
|
|
f5c1518438 | ||
|
|
29e4729c22 | ||
|
|
68f3943e0f | ||
|
|
b59f81abff | ||
|
|
94c5524277 | ||
|
|
5b3211e71c | ||
|
|
5c135d0dec | ||
|
|
a4c96836ac | ||
|
|
ff19b22d72 | ||
|
|
d96d4883ce | ||
|
|
83576d7f57 | ||
|
|
23b926d43e | ||
|
|
9aec1b3a61 | ||
|
|
2d65df38d1 | ||
|
|
6f5b6711ea | ||
|
|
89c888bf55 | ||
|
|
a637ee2278 | ||
|
|
1b270759ef | ||
|
|
b10441a41c | ||
|
|
97de2b6550 | ||
|
|
497a037344 | ||
|
|
cf0af16695 | ||
|
|
62b4030278 | ||
|
|
c047c19145 | ||
|
|
b941732f54 | ||
|
|
e591ff2e74 | ||
|
|
bd2f95c130 |
80
.github/check_and_update.py
vendored
Normal file
80
.github/check_and_update.py
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
import hashlib
|
||||
from huggingface_hub import hf_hub_download, get_paths_info
|
||||
import requests
|
||||
import sys
|
||||
import os
|
||||
|
||||
uri = sys.argv[1]
|
||||
file_name = uri.split('/')[-1]
|
||||
|
||||
# Function to parse the URI and determine download method
|
||||
def parse_uri(uri):
|
||||
if uri.startswith('huggingface://'):
|
||||
repo_id = uri.split('://')[1]
|
||||
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
||||
elif 'huggingface.co' in uri:
|
||||
parts = uri.split('/resolve/')
|
||||
if len(parts) > 1:
|
||||
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
||||
return 'huggingface', repo_path
|
||||
return 'direct', uri
|
||||
|
||||
def calculate_sha256(file_path):
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, 'rb') as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b''):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def manual_safety_check_hf(repo_id):
|
||||
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
||||
scan = scanResponse.json()
|
||||
if scan['hasUnsafeFile']:
|
||||
return scan
|
||||
return None
|
||||
|
||||
download_type, repo_id_or_url = parse_uri(uri)
|
||||
|
||||
new_checksum = None
|
||||
file_path = None
|
||||
|
||||
# Decide download method based on URI type
|
||||
if download_type == 'huggingface':
|
||||
# Check if the repo is flagged as dangerous by HF
|
||||
hazard = manual_safety_check_hf(repo_id_or_url)
|
||||
if hazard != None:
|
||||
print(f'Error: HuggingFace has detected security problems for {repo_id_or_url}: {str(hazard)}', filename=file_name)
|
||||
sys.exit(5)
|
||||
# Use HF API to pull sha
|
||||
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
|
||||
try:
|
||||
new_checksum = file.lfs.sha256
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
if new_checksum is None:
|
||||
try:
|
||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
else:
|
||||
response = requests.get(repo_id_or_url)
|
||||
if response.status_code == 200:
|
||||
with open(file_name, 'wb') as f:
|
||||
f.write(response.content)
|
||||
file_path = file_name
|
||||
elif response.status_code == 404:
|
||||
print(f'File not found: {response.status_code}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if new_checksum is None:
|
||||
new_checksum = calculate_sha256(file_path)
|
||||
print(new_checksum)
|
||||
os.remove(file_path)
|
||||
else:
|
||||
print(new_checksum)
|
||||
79
.github/checksum_checker.sh
vendored
79
.github/checksum_checker.sh
vendored
@@ -14,77 +14,14 @@ function check_and_update_checksum() {
|
||||
idx="$5"
|
||||
|
||||
# Download the file and calculate new checksum using Python
|
||||
new_checksum=$(python3 -c "
|
||||
import hashlib
|
||||
from huggingface_hub import hf_hub_download, get_paths_info
|
||||
import requests
|
||||
import sys
|
||||
import os
|
||||
new_checksum=$(python3 ./.github/check_and_update.py $uri)
|
||||
result=$?
|
||||
|
||||
uri = '$uri'
|
||||
file_name = uri.split('/')[-1]
|
||||
|
||||
# Function to parse the URI and determine download method
|
||||
# Function to parse the URI and determine download method
|
||||
def parse_uri(uri):
|
||||
if uri.startswith('huggingface://'):
|
||||
repo_id = uri.split('://')[1]
|
||||
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
||||
elif 'huggingface.co' in uri:
|
||||
parts = uri.split('/resolve/')
|
||||
if len(parts) > 1:
|
||||
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
||||
return 'huggingface', repo_path
|
||||
return 'direct', uri
|
||||
|
||||
def calculate_sha256(file_path):
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, 'rb') as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b''):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
download_type, repo_id_or_url = parse_uri(uri)
|
||||
|
||||
new_checksum = None
|
||||
|
||||
# Decide download method based on URI type
|
||||
if download_type == 'huggingface':
|
||||
# Use HF API to pull sha
|
||||
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
|
||||
try:
|
||||
new_checksum = file.lfs.sha256
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
if new_checksum is None:
|
||||
try:
|
||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
else:
|
||||
response = requests.get(repo_id_or_url)
|
||||
if response.status_code == 200:
|
||||
with open(file_name, 'wb') as f:
|
||||
f.write(response.content)
|
||||
file_path = file_name
|
||||
elif response.status_code == 404:
|
||||
print(f'File not found: {response.status_code}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if new_checksum is None:
|
||||
new_checksum = calculate_sha256(file_path)
|
||||
print(new_checksum)
|
||||
os.remove(file_path)
|
||||
else:
|
||||
print(new_checksum)
|
||||
|
||||
")
|
||||
if [[ $result -eq 5 ]]; then
|
||||
echo "Contaminated entry detected, deleting entry for $model_name..."
|
||||
yq eval -i "del([$idx])" "$input_yaml"
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ "$new_checksum" == "" ]]; then
|
||||
echo "Error calculating checksum for $file_name. Skipping..."
|
||||
@@ -94,7 +31,7 @@ else:
|
||||
echo "Checksum for $file_name: $new_checksum"
|
||||
|
||||
# Compare and update the YAML file if checksums do not match
|
||||
result=$?
|
||||
|
||||
if [[ $result -eq 2 ]]; then
|
||||
echo "File not found, deleting entry for $file_name..."
|
||||
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
|
||||
|
||||
112
.github/dependabot.yml
vendored
112
.github/dependabot.yml
vendored
@@ -1,6 +1,10 @@
|
||||
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "gitsubmodule"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/"
|
||||
schedule:
|
||||
@@ -23,3 +27,111 @@ updates:
|
||||
schedule:
|
||||
# Check for updates to GitHub Actions every weekday
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/autogptq"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/bark"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/common/template"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/coqui"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/diffusers"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/exllama"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/exllama2"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/mamba"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/openvoice"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/parler-tts"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/petals"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/rerankers"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/sentencetransformers"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/transformers"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/transformers-musicgen"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/vall-e-x"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/vllm"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/examples/chainlit"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/examples/functions"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/examples/langchain/langchainpy-localai-example"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/examples/langchain-chroma"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/examples/streamlit-bot"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/examples/k8sgpt"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/examples/kubernetes"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/examples/langchain"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/examples/semantic-todo"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/examples/telegram-bot"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
|
||||
3
.github/release.yml
vendored
3
.github/release.yml
vendored
@@ -13,6 +13,9 @@ changelog:
|
||||
labels:
|
||||
- bug
|
||||
- regression
|
||||
- title: "🖧 P2P area"
|
||||
labels:
|
||||
- area/p2p
|
||||
- title: Exciting New Features 🎉
|
||||
labels:
|
||||
- Semver-Minor
|
||||
|
||||
8
.github/workflows/bump_deps.yaml
vendored
8
.github/workflows/bump_deps.yaml
vendored
@@ -9,9 +9,6 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- repository: "go-skynet/go-llama.cpp"
|
||||
variable: "GOLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "ggerganov/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
branch: "master"
|
||||
@@ -30,9 +27,6 @@ jobs:
|
||||
- repository: "go-skynet/bloomz.cpp"
|
||||
variable: "BLOOMZ_VERSION"
|
||||
branch: "main"
|
||||
- repository: "nomic-ai/gpt4all"
|
||||
variable: "GPT4ALL_VERSION"
|
||||
branch: "main"
|
||||
- repository: "mudler/go-ggllm.cpp"
|
||||
variable: "GOGGLLM_VERSION"
|
||||
branch: "master"
|
||||
@@ -54,7 +48,7 @@ jobs:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
title: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
|
||||
branch: "update/${{ matrix.variable }}"
|
||||
body: Bump of ${{ matrix.repository }} version
|
||||
signoff: true
|
||||
|
||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -22,7 +22,7 @@ jobs:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||
title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
|
||||
branch: "update/docs"
|
||||
body: Bump of ${{ matrix.repository }} version inside docs
|
||||
signoff: true
|
||||
|
||||
4
.github/workflows/checksum_checker.yaml
vendored
4
.github/workflows/checksum_checker.yaml
vendored
@@ -20,12 +20,12 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y pip wget
|
||||
sudo pip install --upgrade pip
|
||||
sudo pip install --upgrade pip
|
||||
pip install huggingface_hub
|
||||
- name: 'Setup yq'
|
||||
uses: dcarbone/install-yq-action@v1.1.1
|
||||
with:
|
||||
version: 'v4.43.1'
|
||||
version: 'v4.44.2'
|
||||
download-compressed: true
|
||||
force: true
|
||||
|
||||
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.1.0
|
||||
uses: dependabot/fetch-metadata@v2.2.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
83
.github/workflows/disabled/comment-pr.yaml
vendored
Normal file
83
.github/workflows/disabled/comment-pr.yaml
vendored
Normal file
@@ -0,0 +1,83 @@
|
||||
name: Comment PRs
|
||||
on:
|
||||
pull_request_target:
|
||||
|
||||
jobs:
|
||||
comment-pr:
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: "${{ github.event.pull_request.merge_commit_sha }}"
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
raw_diff_file_output: diff.txt
|
||||
file_output_only: "true"
|
||||
base_branch: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Show diff
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
run: |
|
||||
cat $DIFF
|
||||
- name: Summarize
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
id: summarize
|
||||
run: |
|
||||
input="$(cat $DIFF)"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
echo "Summary:"
|
||||
echo "$summary"
|
||||
echo "payload sent"
|
||||
echo "$json_payload"
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
docker logs --tail 10 local-ai
|
||||
- uses: mshick/add-pr-comment@v2
|
||||
if: always()
|
||||
with:
|
||||
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
message: ${{ steps.summarize.outputs.message }}
|
||||
message-failure: |
|
||||
Uh oh! Could not analyze this PR, maybe it's too big?
|
||||
6
.github/workflows/generate_grpc_cache.yaml
vendored
6
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -75,7 +75,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache GRPC
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
@@ -84,11 +84,11 @@ jobs:
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.64.0
|
||||
GRPC_VERSION=v1.65.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-to: type=gha,ignore-error=true
|
||||
cache-from: type=gha
|
||||
target: grpc
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: false
|
||||
push: false
|
||||
|
||||
4
.github/workflows/generate_intel_image.yaml
vendored
4
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
|
||||
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
@@ -46,7 +46,7 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache Intel images
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
|
||||
185
.github/workflows/image-pr.yml
vendored
185
.github/workflows/image-pr.yml
vendored
@@ -35,18 +35,19 @@ jobs:
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
# This is basically covered by the AIO test
|
||||
# - build-type: ''
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-ffmpeg'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'extras'
|
||||
# runs-on: 'arc-runner-set'
|
||||
# base-image: "ubuntu:22.04"
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -55,85 +56,85 @@ jobs:
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'hipblas'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-vulkan-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
# - build-type: 'hipblas'
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-hipblas'
|
||||
# ffmpeg: 'false'
|
||||
# image-type: 'extras'
|
||||
# base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
# grpc-base-image: "ubuntu:22.04"
|
||||
# runs-on: 'arc-runner-set'
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
# - build-type: 'sycl_f16'
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
# grpc-base-image: "ubuntu:22.04"
|
||||
# tag-suffix: 'sycl-f16-ffmpeg'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'extras'
|
||||
# runs-on: 'arc-runner-set'
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
# core-image-build:
|
||||
# uses: ./.github/workflows/image_build.yml
|
||||
# with:
|
||||
# tag-latest: ${{ matrix.tag-latest }}
|
||||
# tag-suffix: ${{ matrix.tag-suffix }}
|
||||
# ffmpeg: ${{ matrix.ffmpeg }}
|
||||
# image-type: ${{ matrix.image-type }}
|
||||
# build-type: ${{ matrix.build-type }}
|
||||
# cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
# platforms: ${{ matrix.platforms }}
|
||||
# runs-on: ${{ matrix.runs-on }}
|
||||
# base-image: ${{ matrix.base-image }}
|
||||
# grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
# makeflags: ${{ matrix.makeflags }}
|
||||
# secrets:
|
||||
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
# strategy:
|
||||
# matrix:
|
||||
# include:
|
||||
# - build-type: ''
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-ffmpeg-core'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'core'
|
||||
# runs-on: 'ubuntu-latest'
|
||||
# base-image: "ubuntu:22.04"
|
||||
# makeflags: "--jobs=4 --output-sync=target"
|
||||
# - build-type: 'sycl_f16'
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
# grpc-base-image: "ubuntu:22.04"
|
||||
# tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'core'
|
||||
# runs-on: 'arc-runner-set'
|
||||
# makeflags: "--jobs=3 --output-sync=target"
|
||||
# - build-type: 'cublas'
|
||||
# cuda-major-version: "12"
|
||||
# cuda-minor-version: "0"
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'core'
|
||||
# runs-on: 'ubuntu-latest'
|
||||
# base-image: "ubuntu:22.04"
|
||||
# makeflags: "--jobs=4 --output-sync=target"
|
||||
# - build-type: 'vulkan'
|
||||
# platforms: 'linux/amd64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-vulkan-ffmpeg-core'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'core'
|
||||
# runs-on: 'ubuntu-latest'
|
||||
# base-image: "ubuntu:22.04"
|
||||
# makeflags: "--jobs=4 --output-sync=target"
|
||||
|
||||
16
.github/workflows/image.yml
vendored
16
.github/workflows/image.yml
vendored
@@ -64,7 +64,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
@@ -86,7 +86,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -274,7 +274,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-core'
|
||||
@@ -285,7 +285,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
@@ -296,7 +296,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "8"
|
||||
cuda-minor-version: "7"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||
@@ -307,7 +307,7 @@ jobs:
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "5"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
|
||||
14
.github/workflows/image_build.yml
vendored
14
.github/workflows/image_build.yml
vendored
@@ -23,7 +23,7 @@ on:
|
||||
type: string
|
||||
cuda-minor-version:
|
||||
description: 'CUDA minor version'
|
||||
default: "5"
|
||||
default: "4"
|
||||
type: string
|
||||
platforms:
|
||||
description: 'Platforms'
|
||||
@@ -215,7 +215,7 @@ jobs:
|
||||
password: ${{ secrets.quayPassword }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
if: github.event_name != 'pull_request'
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
@@ -232,7 +232,7 @@ jobs:
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.64.0
|
||||
GRPC_VERSION=v1.65.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -243,7 +243,7 @@ jobs:
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
### Start testing image
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
if: github.event_name == 'pull_request'
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
@@ -260,7 +260,7 @@ jobs:
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.64.0
|
||||
GRPC_VERSION=v1.65.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -276,7 +276,7 @@ jobs:
|
||||
## End testing image
|
||||
- name: Build and push AIO image
|
||||
if: inputs.aio != ''
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
@@ -291,7 +291,7 @@ jobs:
|
||||
|
||||
- name: Build and push AIO image (dockerhub)
|
||||
if: inputs.aio != ''
|
||||
uses: docker/build-push-action@v5
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
|
||||
168
.github/workflows/notify-models.yaml
vendored
Normal file
168
.github/workflows/notify-models.yaml
vendored
Normal file
@@ -0,0 +1,168 @@
|
||||
name: Notifications for new models
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- closed
|
||||
|
||||
jobs:
|
||||
notify-discord:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
raw_diff_file_output: diff.txt
|
||||
file_output_only: "true"
|
||||
- name: Summarize
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
id: summarize
|
||||
run: |
|
||||
input="$(cat $DIFF)"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are LocalAI-bot. Write a discord message to notify everyone about the new model from the git diff. Make it informal. An example can include: the URL of the model, the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI and that can be browsed over https://models.localai.io. For example: local-ai run model_name_here"
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
echo "Summary:"
|
||||
echo "$summary"
|
||||
echo "payload sent"
|
||||
echo "$json_payload"
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
docker logs --tail 10 local-ai
|
||||
- name: Discord notification
|
||||
env:
|
||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
DISCORD_USERNAME: "LocalAI-Bot"
|
||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
||||
uses: Ilshidur/action-discord@master
|
||||
with:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
notify-twitter:
|
||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||
env:
|
||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||
- name: Start LocalAI
|
||||
run: |
|
||||
echo "Starting LocalAI..."
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
raw_diff_file_output: diff.txt
|
||||
file_output_only: "true"
|
||||
- name: Summarize
|
||||
env:
|
||||
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||
id: summarize
|
||||
run: |
|
||||
input="$(cat $DIFF)"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are LocalAI-bot. Write a twitter message to notify everyone about the new model from the git diff. Make it informal and really short. An example can include: the name, and a brief description of the model if exists. Also add an hint on how to install it in LocalAI. For example: local-ai run model_name_here"
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
echo "Summary:"
|
||||
echo "$summary"
|
||||
echo "payload sent"
|
||||
echo "$json_payload"
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
docker logs --tail 10 local-ai
|
||||
- uses: Eomm/why-don-t-you-tweet@v2
|
||||
with:
|
||||
tweet-message: ${{ steps.summarize.outputs.message }}
|
||||
env:
|
||||
# Get your tokens from https://developer.twitter.com/apps
|
||||
TWITTER_CONSUMER_API_KEY: ${{ secrets.TWITTER_APP_KEY }}
|
||||
TWITTER_CONSUMER_API_SECRET: ${{ secrets.TWITTER_APP_SECRET }}
|
||||
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
|
||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||
- name: Setup tmate session if fails
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
63
.github/workflows/notify-releases.yaml
vendored
Normal file
63
.github/workflows/notify-releases.yaml
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
name: Release notifications
|
||||
on:
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
|
||||
jobs:
|
||||
notify-discord:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
RELEASE_BODY: ${{ github.event.release.body }}
|
||||
RELEASE_TITLE: ${{ github.event.release.name }}
|
||||
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
|
||||
steps:
|
||||
- uses: mudler/localai-github-action@v1
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
- name: Summarize
|
||||
id: summarize
|
||||
run: |
|
||||
input="$RELEASE_TITLE\b$RELEASE_BODY"
|
||||
|
||||
# Define the LocalAI API endpoint
|
||||
API_URL="http://localhost:8080/chat/completions"
|
||||
|
||||
# Create a JSON payload using jq to handle special characters
|
||||
json_payload=$(jq -n --arg input "$input" '{
|
||||
model: "'$MODEL_NAME'",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "Write a discord message with a bullet point summary of the release notes."
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: $input
|
||||
}
|
||||
]
|
||||
}')
|
||||
|
||||
# Send the request to LocalAI API
|
||||
response=$(curl -s -X POST $API_URL \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$json_payload")
|
||||
|
||||
# Extract the summary from the response
|
||||
summary=$(echo $response | jq -r '.choices[0].message.content')
|
||||
|
||||
# Print the summary
|
||||
# -H "Authorization: Bearer $API_KEY" \
|
||||
{
|
||||
echo 'message<<EOF'
|
||||
echo "$summary"
|
||||
echo EOF
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
- name: Discord notification
|
||||
env:
|
||||
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL_RELEASE }}
|
||||
DISCORD_USERNAME: "LocalAI-Bot"
|
||||
DISCORD_AVATAR: "https://avatars.githubusercontent.com/u/139863280?v=4"
|
||||
uses: Ilshidur/action-discord@master
|
||||
with:
|
||||
args: ${{ steps.summarize.outputs.message }}
|
||||
28
.github/workflows/prlint.yaml
vendored
Normal file
28
.github/workflows/prlint.yaml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
name: Check PR style
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- reopened
|
||||
- edited
|
||||
- synchronize
|
||||
|
||||
jobs:
|
||||
title-lint:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
statuses: write
|
||||
steps:
|
||||
- uses: aslafy-z/conventional-pr-title-action@v3
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# check-pr-description:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - uses: actions/checkout@v2
|
||||
# - uses: jadrol/pr-description-checker-action@v1.0.0
|
||||
# id: description-checker
|
||||
# with:
|
||||
# repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
# exempt-labels: no qa
|
||||
93
.github/workflows/release.yaml
vendored
93
.github/workflows/release.yaml
vendored
@@ -1,11 +1,15 @@
|
||||
name: Build and Release
|
||||
|
||||
on:
|
||||
- push
|
||||
- pull_request
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- 'v*'
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.64.0
|
||||
GRPC_VERSION: v1.65.0
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -27,12 +31,11 @@ jobs:
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||
@@ -40,7 +43,7 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-5
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -52,7 +55,8 @@ jobs:
|
||||
run: |
|
||||
|
||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make --jobs 5 --output-sync=target
|
||||
- name: Install gRPC
|
||||
@@ -96,14 +100,13 @@ jobs:
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
||||
GOOS=linux \
|
||||
GOARCH=arm64 \
|
||||
@@ -147,7 +150,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
@@ -161,7 +164,7 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-3
|
||||
CUDA_VERSION: 12-5
|
||||
- name: "Install Hipblas"
|
||||
env:
|
||||
ROCM_VERSION: "6.1"
|
||||
@@ -197,7 +200,8 @@ jobs:
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make --jobs 5 --output-sync=target
|
||||
- name: Install gRPC
|
||||
@@ -207,15 +211,14 @@ jobs:
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
export PATH=/opt/rocm/bin:$PATH
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
|
||||
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
|
||||
make -j4 dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
@@ -248,9 +251,9 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build stablediffusion
|
||||
run: |
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
@@ -269,8 +272,8 @@ jobs:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
build-macOS-arm64:
|
||||
runs-on: macos-14
|
||||
build-macOS-x86_64:
|
||||
runs-on: macos-13
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
@@ -292,7 +295,49 @@ jobs:
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
|
||||
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-MacOS-x86_64
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
build-macOS-arm64:
|
||||
runs-on: macos-14
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-MacOS-arm64
|
||||
|
||||
60
.github/workflows/test-extra.yml
vendored
60
.github/workflows/test-extra.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -29,8 +29,8 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test transformers
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -51,8 +51,8 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test sentencetransformers
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test rerankers
|
||||
run: |
|
||||
@@ -86,7 +86,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -96,7 +96,7 @@ jobs:
|
||||
sudo apt-get install -y libopencv-dev
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Test diffusers
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
@@ -107,7 +107,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -117,19 +117,19 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test parler-tts
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||
|
||||
|
||||
tests-openvoice:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -139,7 +139,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test openvoice
|
||||
run: |
|
||||
@@ -151,7 +151,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test transformers-musicgen
|
||||
run: |
|
||||
@@ -175,7 +175,7 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
@@ -185,14 +185,14 @@ jobs:
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
# - name: Test petals
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
||||
|
||||
|
||||
|
||||
|
||||
# tests-bark:
|
||||
# runs-on: ubuntu-latest
|
||||
@@ -239,7 +239,7 @@ jobs:
|
||||
# df -h
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
@@ -249,14 +249,14 @@ jobs:
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
# - name: Test bark
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/bark
|
||||
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
||||
|
||||
|
||||
|
||||
# Below tests needs GPU. Commented out for now
|
||||
# TODO: Re-enable as soon as we have GPU nodes
|
||||
# tests-vllm:
|
||||
@@ -264,7 +264,7 @@ jobs:
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
@@ -274,7 +274,7 @@ jobs:
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
# - name: Test vllm
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||
@@ -284,7 +284,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -294,7 +294,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Test vall-e-x
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||
@@ -305,7 +305,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
@@ -314,8 +314,8 @@ jobs:
|
||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Test coqui
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||
|
||||
16
.github/workflows/test.yml
vendored
16
.github/workflows/test.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.64.0
|
||||
GRPC_VERSION: v1.65.0
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -70,7 +70,8 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential curl ffmpeg
|
||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||
sudo apt-get install -y libgmock-dev
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
@@ -93,8 +94,8 @@ jobs:
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
|
||||
# The python3-grpc-tools package in 22.04 is too old
|
||||
pip install --user grpcio-tools
|
||||
@@ -109,7 +110,7 @@ jobs:
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
env:
|
||||
CUDA_VERSION: 12-3
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -120,7 +121,8 @@ jobs:
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
|
||||
cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make --jobs 5
|
||||
- name: Install gRPC
|
||||
@@ -213,7 +215,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
|
||||
8
.github/workflows/update_swagger.yaml
vendored
8
.github/workflows/update_swagger.yaml
vendored
@@ -13,11 +13,17 @@ jobs:
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install protobuf-compiler
|
||||
- run: |
|
||||
go install github.com/swaggo/swag/cmd/swag@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Bump swagger 🔧
|
||||
run: |
|
||||
make swagger
|
||||
make protogen-go swagger
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
|
||||
145
Dockerfile
145
Dockerfile
@@ -5,16 +5,10 @@ ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM ${BASE_IMAGE} AS requirements-core
|
||||
# TODO(mudler): install all accellerators here
|
||||
# and use make dist instead of build.
|
||||
# TODO(mudler): modify make dist to build also go-piper and stablediffusion
|
||||
# This way the same binary can work for everything(!)
|
||||
# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
|
||||
# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies
|
||||
|
||||
USER root
|
||||
|
||||
ARG GO_VERSION=1.22.4
|
||||
ARG GO_VERSION=1.22.5
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
@@ -30,7 +24,7 @@ RUN apt-get update && \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
unzip && \
|
||||
unzip upx-ucl && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -55,12 +49,10 @@ ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
# HipBLAS requirements
|
||||
ENV PATH /opt/rocm/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion, tts (espeak)
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
espeak-ng \
|
||||
espeak \
|
||||
libopencv-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
@@ -85,6 +77,8 @@ ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
espeak-ng \
|
||||
espeak \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
python3-dev \
|
||||
@@ -99,12 +93,13 @@ RUN pip install --user grpcio-tools
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# Base image for the build-type.
|
||||
FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers
|
||||
# The requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||
FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=5
|
||||
ARG CUDA_MINOR_VERSION=0
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
@@ -113,11 +108,11 @@ RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
apt-get install -y \
|
||||
vulkan-sdk && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
@@ -129,33 +124,13 @@ RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
software-properties-common pciutils
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
@@ -167,8 +142,9 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
@@ -191,82 +167,6 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
ldconfig \
|
||||
; fi
|
||||
|
||||
# The build-requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||
FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=5
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
vulkan-sdk && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
EOT
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN <<EOT bash
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
EOT
|
||||
|
||||
# clblas
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# intel
|
||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# hipblas
|
||||
RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
|
||||
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
|
||||
| tee /etc/apt/sources.list.d/amdgpu.list && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" | tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
apt update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
hipblas-dev rocm-dev \
|
||||
rocblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||
ldconfig
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
@@ -287,7 +187,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
|
||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.64.2
|
||||
ARG GRPC_VERSION=v1.65.0
|
||||
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
@@ -308,6 +208,7 @@ RUN apt-get update && \
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
mkdir -p /build/grpc/cmake/build && \
|
||||
cd /build/grpc/cmake/build && \
|
||||
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||
make && \
|
||||
make install && \
|
||||
@@ -318,7 +219,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM build-requirements-drivers AS builder
|
||||
FROM requirements-drivers AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
@@ -363,8 +264,9 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
# Need to build tts and stablediffusion separately first (?)
|
||||
RUN make dist && rm release/*.sha256 && mv release/* local-ai
|
||||
|
||||
## Build the binary
|
||||
RUN make build
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
@@ -376,7 +278,7 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
|
||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||
FROM run-requirements-drivers
|
||||
FROM requirements-drivers
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
@@ -421,7 +323,6 @@ RUN make prepare-sources
|
||||
COPY --from=builder /build/local-ai ./
|
||||
|
||||
# Copy shared libraries for piper
|
||||
# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
|
||||
214
Makefile
214
Makefile
@@ -3,9 +3,12 @@ GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=081fe431aa8fb6307145c4feb3eed4f48cab19f8
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,26 +19,33 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
|
||||
|
||||
# bert.cpp version
|
||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||
|
||||
# go-piper version
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# tinydream version
|
||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
export CMAKE_ARGS?=
|
||||
export BACKEND_LIBS?=
|
||||
|
||||
CGO_LDFLAGS?=
|
||||
CGO_LDFLAGS_WHISPER?=
|
||||
CGO_LDFLAGS_WHISPER+=-lggml
|
||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||
GO_TAGS?=
|
||||
BUILD_ID?=
|
||||
@@ -48,9 +58,9 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
|
||||
|
||||
VERSION?=$(shell git describe --always --tags || echo "dev" )
|
||||
# go tool nm ./local-ai | grep Commit
|
||||
LD_FLAGS?=
|
||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
|
||||
override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
LD_FLAGS?=-s -w
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
|
||||
override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
|
||||
|
||||
OPTIONAL_TARGETS?=
|
||||
|
||||
@@ -62,6 +72,14 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
UPX?=
|
||||
# check if upx exists
|
||||
ifeq (, $(shell which upx))
|
||||
UPX=
|
||||
else
|
||||
UPX=$(shell which upx)
|
||||
endif
|
||||
|
||||
# Default Docker bridge IP
|
||||
E2E_BRIDGE_IP?=172.17.0.1
|
||||
|
||||
@@ -82,24 +100,25 @@ ifeq ($(OS),Darwin)
|
||||
else ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
export GGML_NO_ACCELERATE=1
|
||||
export GGML_NO_METAL=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
# -lcblas removed: it seems to always be listed as a duplicate flag.
|
||||
CGO_LDFLAGS += -framework Accelerate
|
||||
endif
|
||||
else
|
||||
CGO_LDFLAGS_WHISPER+=-lgomp
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),openblas)
|
||||
CGO_LDFLAGS+=-lopenblas
|
||||
export WHISPER_OPENBLAS=1
|
||||
export GGML_OPENBLAS=1
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
export GGML_CUDA=1
|
||||
export WHISPER_CUDA=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
||||
endif
|
||||
|
||||
@@ -107,6 +126,14 @@ ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export GGML_SYCL=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
export GGML_SYCL_F16=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),hipblas)
|
||||
ROCM_HOME ?= /opt/rocm
|
||||
ROCM_PATH ?= /opt/rocm
|
||||
@@ -115,7 +142,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export WHISPER_HIPBLAS=1
|
||||
export GGML_HIPBLAS=1
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
@@ -125,17 +152,16 @@ endif
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
export GGML_METAL=1
|
||||
export WHISPER_METAL=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),clblas)
|
||||
CGO_LDFLAGS+=-lOpenCL -lclblast
|
||||
export WHISPER_CLBLAST=1
|
||||
export GGML_OPENBLAS=1
|
||||
endif
|
||||
|
||||
# glibc-static or glibc-devel-static required
|
||||
ifeq ($(STATIC),true)
|
||||
LD_FLAGS=-linkmode external -extldflags -static
|
||||
LD_FLAGS+=-linkmode external -extldflags -static
|
||||
endif
|
||||
|
||||
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
||||
@@ -169,6 +195,8 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||
# Use filter-out to remove the specified backends
|
||||
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
||||
|
||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||
TEST_PATHS?=./api/... ./pkg/... ./core/...
|
||||
@@ -188,69 +216,109 @@ all: help
|
||||
|
||||
## BERT embeddings
|
||||
sources/go-bert.cpp:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
|
||||
cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/go-bert.cpp
|
||||
cd sources/go-bert.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(BERT_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(BERT_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
|
||||
cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/go-llama.cpp
|
||||
cd sources/go-llama.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(GOLLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(GOLLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## go-piper
|
||||
sources/go-piper:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
|
||||
cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/go-piper
|
||||
cd sources/go-piper && \
|
||||
git init && \
|
||||
git remote add origin $(PIPER_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(PIPER_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||
|
||||
## GPT4ALL
|
||||
sources/gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
|
||||
cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/gpt4all
|
||||
cd sources/gpt4all && \
|
||||
git init && \
|
||||
git remote add origin $(GPT4ALL_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(GPT4ALL_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||
|
||||
## RWKV
|
||||
sources/go-rwkv.cpp:
|
||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(RWKV_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(RWKV_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||
|
||||
## stable diffusion
|
||||
sources/go-stable-diffusion:
|
||||
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && \
|
||||
git init && \
|
||||
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(STABLEDIFFUSION_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||
|
||||
## tiny-dream
|
||||
sources/go-tiny-dream:
|
||||
git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
|
||||
cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/go-tiny-dream
|
||||
cd sources/go-tiny-dream && \
|
||||
git init && \
|
||||
git remote add origin $(TINYDREAM_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(TINYDREAM_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
||||
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
||||
|
||||
## whisper
|
||||
sources/whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p sources/whisper.cpp
|
||||
cd sources/whisper.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(WHISPER_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(WHISPER_CPP_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||
@@ -317,14 +385,15 @@ build: prepare backend-assets grpcs ## Build the project
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||
$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
|
||||
ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp $(BACKEND_LIBS) backend-assets/lib/
|
||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||
endif
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||
|
||||
build-minimal:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||
|
||||
build-api:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||
@@ -334,17 +403,22 @@ backend-assets/lib:
|
||||
|
||||
dist:
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||
ifeq ($(DETECT_LIBS),true)
|
||||
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
||||
endif
|
||||
ifeq ($(OS),Darwin)
|
||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
||||
else
|
||||
ifneq ($(ARCH),arm64)
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
|
||||
endif
|
||||
GO_TAGS="tts p2p" $(MAKE) build
|
||||
ifeq ($(DETECT_LIBS),true)
|
||||
scripts/prepare-libs.sh backend-assets/grpc/piper
|
||||
endif
|
||||
STATIC=true $(MAKE) build
|
||||
GO_TAGS="tts p2p" STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
@@ -355,8 +429,8 @@ else
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
|
||||
endif
|
||||
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
@@ -406,7 +480,7 @@ prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
@@ -668,13 +742,22 @@ backend-assets/grpc: protogen-go replace
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bert-embeddings
|
||||
endif
|
||||
|
||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/gpt4all
|
||||
endif
|
||||
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/huggingface
|
||||
endif
|
||||
|
||||
backend/cpp/llama/llama.cpp:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||
@@ -700,30 +783,33 @@ else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend/cpp/${VARIANT}/grpc-server
|
||||
endif
|
||||
|
||||
# This target is for manually building a variant with-auto detected flags
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cpp
|
||||
$(MAKE) -C backend/cpp/llama-cpp purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||
$(MAKE) -C backend/cpp/llama-avx purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||
|
||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
@@ -734,35 +820,35 @@ ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||
$(MAKE) -C backend/cpp/llama-cuda purge
|
||||
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||
|
||||
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
|
||||
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
|
||||
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
|
||||
|
||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
@@ -772,33 +858,57 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/util/llama-cpp-rpc-server
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/rwkv
|
||||
endif
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/whisper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/local-store: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/local-store
|
||||
endif
|
||||
|
||||
grpcs: prepare $(GRPC_BACKENDS)
|
||||
|
||||
@@ -840,7 +950,7 @@ docker-aio-all:
|
||||
|
||||
docker-image-intel:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -848,7 +958,7 @@ docker-image-intel:
|
||||
|
||||
docker-image-intel-xpu:
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="none" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
@@ -863,7 +973,7 @@ gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
docs/layouts/_default:
|
||||
mkdir -p docs/layouts/_default
|
||||
|
||||
docs/static/gallery.html: docs/layouts/_default
|
||||
@@ -878,4 +988,4 @@ docs-clean:
|
||||
|
||||
.PHONY: docs
|
||||
docs: docs/static/gallery.html
|
||||
cd docs && hugo serve
|
||||
cd docs && hugo serve
|
||||
|
||||
19
README.md
19
README.md
@@ -72,13 +72,15 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
||||
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
||||
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
||||
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
|
||||
@@ -88,6 +90,7 @@ Hot topics (looking for contributors):
|
||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
||||
|
||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||
|
||||
@@ -104,6 +107,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
- 🌍 Integrated WebUI!
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
@@ -132,6 +136,7 @@ Other:
|
||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||
- Github Actions: https://github.com/marketplace/actions/start-localai
|
||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||
|
||||
|
||||
|
||||
@@ -46,9 +46,14 @@ endif
|
||||
$(INSTALLED_PACKAGES): grpc_build
|
||||
|
||||
$(GRPC_REPO):
|
||||
git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
|
||||
cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
|
||||
|
||||
mkdir -p $(GRPC_REPO)/grpc
|
||||
cd $(GRPC_REPO)/grpc && \
|
||||
git init && \
|
||||
git remote add origin $(GIT_REPO_LIB_GRPC) && \
|
||||
git fetch origin && \
|
||||
git checkout $(TAG_LIB_GRPC) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
$(GRPC_BUILD): $(GRPC_REPO)
|
||||
mkdir -p $(GRPC_BUILD)
|
||||
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
LLAMA_VERSION?=
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
@@ -45,11 +46,13 @@ ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
git clone --recurse-submodules https://github.com/ggerganov/llama.cpp llama.cpp
|
||||
if [ -z "$(LLAMA_VERSION)" ]; then \
|
||||
exit 1; \
|
||||
fi
|
||||
cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
mkdir -p llama.cpp
|
||||
cd llama.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(LLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout -b build $(LLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
llama.cpp/examples/grpc-server: llama.cpp
|
||||
mkdir -p llama.cpp/examples/grpc-server
|
||||
@@ -71,9 +74,9 @@ clean: purge
|
||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
bash -c "source $(ONEAPI_VARS); \
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||
else
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
@@ -2108,6 +2108,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
||||
data["grammar"] = predict->grammar();
|
||||
data["prompt"] = predict->prompt();
|
||||
data["ignore_eos"] = predict->ignoreeos();
|
||||
data["embeddings"] = predict->embeddings();
|
||||
|
||||
// for each image in the request, add the image data
|
||||
//
|
||||
@@ -2258,7 +2259,6 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
// get the directory of modelfile
|
||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
|
||||
params.lora_base = model_dir + "/"+request->lorabase();
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
params.use_mmap = request->mmap();
|
||||
@@ -2385,6 +2385,31 @@ public:
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
|
||||
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
|
||||
json data = parse_options(false, request, llama);
|
||||
const int task_id = llama.queue_tasks.get_new_id();
|
||||
llama.queue_results.add_waiting_task_id(task_id);
|
||||
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
|
||||
// get the result
|
||||
task_result result = llama.queue_results.recv(task_id);
|
||||
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
|
||||
llama.queue_results.remove_waiting_task_id(task_id);
|
||||
if (!result.error && result.stop) {
|
||||
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
|
||||
// loop the vector and set the embeddings results
|
||||
for (int i = 0; i < embeddings.size(); i++) {
|
||||
embeddingResult->add_embeddings(embeddings[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
};
|
||||
|
||||
void RunServer(const std::string& server_address) {
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
|
||||
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
bark==0.1.5
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -148,13 +148,13 @@ function startBackend() {
|
||||
ensureVenv
|
||||
|
||||
if [ ! -z ${BACKEND_FILE} ]; then
|
||||
python ${BACKEND_FILE} $@
|
||||
exec python ${BACKEND_FILE} $@
|
||||
elif [ -e "${MY_DIR}/server.py" ]; then
|
||||
python ${MY_DIR}/server.py $@
|
||||
exec python ${MY_DIR}/server.py $@
|
||||
elif [ -e "${MY_DIR}/backend.py" ]; then
|
||||
python ${MY_DIR}/backend.py $@
|
||||
exec python ${MY_DIR}/backend.py $@
|
||||
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
|
||||
python ${MY_DIR}/${BACKEND_NAME}.py $@
|
||||
exec python ${MY_DIR}/${BACKEND_NAME}.py $@
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -210,4 +210,4 @@ function checkTargets() {
|
||||
echo false
|
||||
}
|
||||
|
||||
init
|
||||
init
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
TTS==0.22.0
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
from concurrent import futures
|
||||
|
||||
import traceback
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from enum import Enum
|
||||
@@ -17,35 +17,39 @@ import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||
EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from diffusers.utils import load_image,export_to_video
|
||||
from diffusers.utils import load_image, export_to_video
|
||||
from compel import Compel, ReturnedEmbeddingsType
|
||||
|
||||
from transformers import CLIPTextModel
|
||||
from safetensors.torch import load_file
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
COMPEL=os.environ.get("COMPEL", "0") == "1"
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
|
||||
SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
|
||||
CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
|
||||
FPS=os.environ.get("FPS", "7")
|
||||
DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
||||
FRAMES=os.environ.get("FRAMES", "64")
|
||||
COMPEL = os.environ.get("COMPEL", "0") == "1"
|
||||
XPU = os.environ.get("XPU", "0") == "1"
|
||||
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
|
||||
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
|
||||
CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8")
|
||||
FPS = os.environ.get("FPS", "7")
|
||||
DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
||||
FRAMES = os.environ.get("FRAMES", "64")
|
||||
|
||||
if XPU:
|
||||
import intel_extension_for_pytorch as ipex
|
||||
|
||||
print(ipex.xpu.get_device_name(0))
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
|
||||
# https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
|
||||
def sc(self, clip_input, images) : return images, [False for i in images]
|
||||
def sc(self, clip_input, images): return images, [False for i in images]
|
||||
|
||||
|
||||
# edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
|
||||
safety_checker.StableDiffusionSafetyChecker.forward = sc
|
||||
|
||||
@@ -62,6 +66,8 @@ from diffusers.schedulers import (
|
||||
PNDMScheduler,
|
||||
UniPCMultistepScheduler,
|
||||
)
|
||||
|
||||
|
||||
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
|
||||
# Credits to https://github.com/neggles
|
||||
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
|
||||
@@ -136,10 +142,12 @@ def get_scheduler(name: str, config: dict = {}):
|
||||
|
||||
return sched_class.from_config(config)
|
||||
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
try:
|
||||
print(f"Loading model {request.Model}...", file=sys.stderr)
|
||||
@@ -149,7 +157,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
if request.F16Memory:
|
||||
torchType = torch.float16
|
||||
variant="fp16"
|
||||
variant = "fp16"
|
||||
|
||||
local = False
|
||||
modelFile = request.Model
|
||||
@@ -157,38 +165,38 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
self.cfg_scale = 7
|
||||
if request.CFGScale != 0:
|
||||
self.cfg_scale = request.CFGScale
|
||||
|
||||
|
||||
clipmodel = "runwayml/stable-diffusion-v1-5"
|
||||
if request.CLIPModel != "":
|
||||
clipmodel = request.CLIPModel
|
||||
clipsubfolder = "text_encoder"
|
||||
if request.CLIPSubfolder != "":
|
||||
clipsubfolder = request.CLIPSubfolder
|
||||
|
||||
|
||||
# Check if ModelFile exists
|
||||
if request.ModelFile != "":
|
||||
if os.path.exists(request.ModelFile):
|
||||
local = True
|
||||
modelFile = request.ModelFile
|
||||
|
||||
|
||||
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
|
||||
self.img2vid=False
|
||||
self.txt2vid=False
|
||||
self.img2vid = False
|
||||
self.txt2vid = False
|
||||
## img2img
|
||||
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
|
||||
if fromSingleFile:
|
||||
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
|
||||
torch_dtype=torchType)
|
||||
torch_dtype=torchType)
|
||||
else:
|
||||
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
|
||||
torch_dtype=torchType)
|
||||
torch_dtype=torchType)
|
||||
|
||||
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
|
||||
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
|
||||
torch_dtype=torchType)
|
||||
torch_dtype=torchType)
|
||||
## img2vid
|
||||
elif request.PipelineType == "StableVideoDiffusionPipeline":
|
||||
self.img2vid=True
|
||||
self.img2vid = True
|
||||
self.pipe = StableVideoDiffusionPipeline.from_pretrained(
|
||||
request.Model, torch_dtype=torchType, variant=variant
|
||||
)
|
||||
@@ -197,64 +205,63 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
## text2img
|
||||
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
|
||||
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=SAFETENSORS,
|
||||
variant=variant)
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=SAFETENSORS,
|
||||
variant=variant)
|
||||
elif request.PipelineType == "StableDiffusionPipeline":
|
||||
if fromSingleFile:
|
||||
self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
|
||||
torch_dtype=torchType)
|
||||
torch_dtype=torchType)
|
||||
else:
|
||||
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
|
||||
torch_dtype=torchType)
|
||||
torch_dtype=torchType)
|
||||
elif request.PipelineType == "DiffusionPipeline":
|
||||
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
||||
torch_dtype=torchType)
|
||||
torch_dtype=torchType)
|
||||
elif request.PipelineType == "VideoDiffusionPipeline":
|
||||
self.txt2vid=True
|
||||
self.txt2vid = True
|
||||
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
||||
torch_dtype=torchType)
|
||||
torch_dtype=torchType)
|
||||
elif request.PipelineType == "StableDiffusionXLPipeline":
|
||||
if fromSingleFile:
|
||||
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True)
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True)
|
||||
else:
|
||||
self.pipe = StableDiffusionXLPipeline.from_pretrained(
|
||||
request.Model,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
request.Model,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
variant=variant)
|
||||
elif request.PipelineType == "StableDiffusion3Pipeline":
|
||||
if fromSingleFile:
|
||||
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True)
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True)
|
||||
else:
|
||||
self.pipe = StableDiffusion3Pipeline.from_pretrained(
|
||||
request.Model,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
request.Model,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
variant=variant)
|
||||
|
||||
if CLIPSKIP and request.CLIPSkip != 0:
|
||||
self.clip_skip = request.CLIPSkip
|
||||
else:
|
||||
self.clip_skip = 0
|
||||
|
||||
|
||||
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU
|
||||
# TODO: this needs to be customized
|
||||
if request.SchedulerType != "":
|
||||
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
|
||||
|
||||
|
||||
if COMPEL:
|
||||
self.compel = Compel(
|
||||
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ],
|
||||
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
|
||||
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
|
||||
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
||||
requires_pooled=[False, True]
|
||||
)
|
||||
|
||||
)
|
||||
|
||||
if request.ControlNet:
|
||||
self.controlnet = ControlNetModel.from_pretrained(
|
||||
@@ -263,13 +270,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
self.pipe.controlnet = self.controlnet
|
||||
else:
|
||||
self.controlnet = None
|
||||
|
||||
if request.CUDA:
|
||||
self.pipe.to('cuda')
|
||||
if self.controlnet:
|
||||
self.controlnet.to('cuda')
|
||||
if XPU:
|
||||
self.pipe = self.pipe.to("xpu")
|
||||
# Assume directory from request.ModelFile.
|
||||
# Only if request.LoraAdapter it's not an absolute path
|
||||
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
||||
@@ -282,10 +282,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
if request.LoraAdapter:
|
||||
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
||||
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
||||
self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
|
||||
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
|
||||
self.pipe.load_lora_weights(request.LoraAdapter)
|
||||
else:
|
||||
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
||||
|
||||
if request.CUDA:
|
||||
self.pipe.to('cuda')
|
||||
if self.controlnet:
|
||||
self.controlnet.to('cuda')
|
||||
if XPU:
|
||||
self.pipe = self.pipe.to("xpu")
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
# Implement your logic here for the LoadModel service
|
||||
@@ -358,9 +365,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
# create a dictionary of values for the parameters
|
||||
options = {
|
||||
"negative_prompt": request.negative_prompt,
|
||||
"width": request.width,
|
||||
"height": request.height,
|
||||
"negative_prompt": request.negative_prompt,
|
||||
"width": request.width,
|
||||
"height": request.height,
|
||||
"num_inference_steps": steps,
|
||||
}
|
||||
|
||||
@@ -372,7 +379,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
options["image"] = pose_image
|
||||
|
||||
if CLIPSKIP and self.clip_skip != 0:
|
||||
options["clip_skip"]=self.clip_skip
|
||||
options["clip_skip"] = self.clip_skip
|
||||
|
||||
# Get the keys that we will build the args for our pipe for
|
||||
keys = options.keys()
|
||||
@@ -416,20 +423,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
image = self.pipe(
|
||||
guidance_scale=self.cfg_scale,
|
||||
**kwargs
|
||||
).images[0]
|
||||
).images[0]
|
||||
else:
|
||||
# pass the kwargs dictionary to the self.pipe method
|
||||
image = self.pipe(
|
||||
prompt,
|
||||
guidance_scale=self.cfg_scale,
|
||||
**kwargs
|
||||
).images[0]
|
||||
).images[0]
|
||||
|
||||
# save the result
|
||||
image.save(request.dst)
|
||||
|
||||
return backend_pb2.Result(message="Media generated", success=True)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
@@ -453,6 +461,7 @@ def serve(address):
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
@@ -460,4 +469,4 @@ if __name__ == "__main__":
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
serve(args.addr)
|
||||
|
||||
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchvision
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,7 +1,9 @@
|
||||
setuptools
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
diffusers
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
opencv-python
|
||||
pillow
|
||||
protobuf
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.0
|
||||
protobuf
|
||||
torch
|
||||
transformers
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
torch
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
packaging
|
||||
setuptools
|
||||
wheel
|
||||
torch==2.2.0
|
||||
torch==2.3.1
|
||||
@@ -1,6 +1,6 @@
|
||||
causal-conv1d==1.2.0.post2
|
||||
mamba-ssm==1.2.0.post1
|
||||
grpcio==1.64.0
|
||||
causal-conv1d==1.4.0
|
||||
mamba-ssm==2.2.2
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -2,22 +2,22 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==0.9.0
|
||||
faster-whisper==1.0.3
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy==1.22.0
|
||||
numpy==1.26.4
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
whisper-timestamped==1.14.2
|
||||
whisper-timestamped==1.15.4
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin==0.50.0
|
||||
cn2an==0.5.22
|
||||
jieba==0.42.1
|
||||
gradio==3.48.0
|
||||
gradio==4.38.1
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==0.9.0
|
||||
librosa
|
||||
faster-whisper
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy==1.22.0
|
||||
numpy
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
whisper-timestamped==1.14.2
|
||||
inflect
|
||||
unidecode
|
||||
whisper-timestamped
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin==0.50.0
|
||||
pypinyin
|
||||
cn2an==0.5.22
|
||||
jieba==0.42.1
|
||||
gradio==3.48.0
|
||||
gradio
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
|
||||
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
rerankers[transformers]
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
sentence-transformers==2.5.1
|
||||
sentence-transformers==3.0.1
|
||||
transformers
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,7 +1,7 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
scipy==1.13.0
|
||||
scipy==1.14.0
|
||||
certifi
|
||||
@@ -2,4 +2,3 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
|
||||
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
vllm
|
||||
grpcio==1.64.0
|
||||
grpcio==1.65.1
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
|
||||
@@ -28,7 +28,6 @@ type Application struct {
|
||||
// LocalAI System Services
|
||||
BackendMonitorService *services.BackendMonitorService
|
||||
GalleryService *services.GalleryService
|
||||
ListModelsService *services.ListModelsService
|
||||
LocalAIMetricsService *services.LocalAIMetricsService
|
||||
// OpenAIService *services.OpenAIService
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
|
||||
utils.ResetDownloadTimers()
|
||||
// if we failed to load the model, we try to download it
|
||||
err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
|
||||
err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction, o.EnforcePredownloadScans)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
Type: c.ModelType,
|
||||
RopeFreqScale: c.RopeFreqScale,
|
||||
NUMA: c.NUMA,
|
||||
Embeddings: c.Embeddings,
|
||||
Embeddings: *c.Embeddings,
|
||||
LowVRAM: *c.LowVRAM,
|
||||
NGPULayers: int32(*c.NGPULayers),
|
||||
MMap: *c.MMap,
|
||||
|
||||
@@ -9,6 +9,7 @@ var CLI struct {
|
||||
cliContext.Context `embed:""`
|
||||
|
||||
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
|
||||
Federated FederatedCLI `cmd:"" help:"Run LocalAI in federated mode"`
|
||||
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
|
||||
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
|
||||
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
|
||||
|
||||
21
core/cli/federated.go
Normal file
21
core/cli/federated.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
)
|
||||
|
||||
type FederatedCLI struct {
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
LoadBalanced bool `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
|
||||
}
|
||||
|
||||
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
|
||||
|
||||
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
|
||||
|
||||
return fs.Start(context.Background())
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package cli
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
@@ -24,7 +25,8 @@ type ModelsList struct {
|
||||
}
|
||||
|
||||
type ModelsInstall struct {
|
||||
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||
ModelArgs []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||
|
||||
ModelsCMDFlags `embed:""`
|
||||
}
|
||||
@@ -88,9 +90,15 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
return err
|
||||
}
|
||||
|
||||
err = gallery.SafetyScanGalleryModel(model)
|
||||
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
}
|
||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName)
|
||||
|
||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ package cli
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -40,25 +42,27 @@ type RunCMD struct {
|
||||
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
|
||||
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
|
||||
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
|
||||
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
||||
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
|
||||
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
|
||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
||||
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
|
||||
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
|
||||
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
|
||||
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
|
||||
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
|
||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
|
||||
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
||||
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
|
||||
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
||||
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
|
||||
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
|
||||
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
|
||||
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
|
||||
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
|
||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
||||
}
|
||||
|
||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
@@ -89,11 +93,13 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithApiKeys(r.APIKeys),
|
||||
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
|
||||
config.WithOpaqueErrors(r.OpaqueErrors),
|
||||
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
|
||||
}
|
||||
|
||||
token := ""
|
||||
if r.Peer2Peer || r.Peer2PeerToken != "" {
|
||||
log.Info().Msg("P2P mode enabled")
|
||||
token := r.Peer2PeerToken
|
||||
token = r.Peer2PeerToken
|
||||
if token == "" {
|
||||
// IF no token is provided, and p2p is enabled,
|
||||
// we generate one and wait for the user to pick up the token (this is for interactive)
|
||||
@@ -104,14 +110,46 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
|
||||
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
|
||||
|
||||
// Ask for user confirmation
|
||||
log.Info().Msg("Press a button to proceed")
|
||||
var input string
|
||||
fmt.Scanln(&input)
|
||||
}
|
||||
opts = append(opts, config.WithP2PToken(token))
|
||||
|
||||
node, err := p2p.NewNode(token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info().Msg("Starting P2P server discovery...")
|
||||
if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
|
||||
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
|
||||
var tunnelAddresses []string
|
||||
for _, v := range p2p.GetAvailableNodes("") {
|
||||
if v.IsOnline() {
|
||||
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
||||
} else {
|
||||
log.Info().Msgf("Node %s is offline", v.ID)
|
||||
}
|
||||
}
|
||||
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
|
||||
|
||||
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
|
||||
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.Federated {
|
||||
_, port, err := net.SplitHostPort(r.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
|
||||
return err
|
||||
}
|
||||
node, err := p2p.NewNode(token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
)
|
||||
|
||||
type UtilCMD struct {
|
||||
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
|
||||
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
|
||||
}
|
||||
|
||||
type GGUFInfoCMD struct {
|
||||
@@ -18,6 +24,12 @@ type GGUFInfoCMD struct {
|
||||
Header bool `optional:"" default:"false" name:"header" help:"Show header information"`
|
||||
}
|
||||
|
||||
type HFScanCMD struct {
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
ToScan []string `arg:""`
|
||||
}
|
||||
|
||||
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||
if u.Args == nil || len(u.Args) == 0 {
|
||||
return fmt.Errorf("no GGUF file provided")
|
||||
@@ -53,3 +65,37 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
|
||||
log.Info().Msg("LocalAI Security Scanner - This is BEST EFFORT functionality! Currently limited to huggingface models!")
|
||||
if len(hfscmd.ToScan) == 0 {
|
||||
log.Info().Msg("Checking all installed models against galleries")
|
||||
var galleries []config.Gallery
|
||||
if err := json.Unmarshal([]byte(hfscmd.Galleries), &galleries); err != nil {
|
||||
log.Error().Err(err).Msg("unable to load galleries")
|
||||
}
|
||||
|
||||
err := gallery.SafetyScanGalleryModels(galleries, hfscmd.ModelsPath)
|
||||
if err == nil {
|
||||
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
|
||||
} else {
|
||||
log.Error().Err(err).Msg("! WARNING ! A known-vulnerable model is installed!")
|
||||
}
|
||||
return err
|
||||
} else {
|
||||
var errs error = nil
|
||||
for _, uri := range hfscmd.ToScan {
|
||||
log.Info().Str("uri", uri).Msg("scanning specific uri")
|
||||
scanResults, err := downloader.HuggingFaceScan(uri)
|
||||
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
|
||||
log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
|
||||
errs = errors.Join(errs, err)
|
||||
}
|
||||
}
|
||||
if errs != nil {
|
||||
return errs
|
||||
}
|
||||
log.Info().Msg("No security warnings were detected for your installed models. Please note that this is a BEST EFFORT tool, and all issues may not be detected.")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ import (
|
||||
|
||||
type P2P struct {
|
||||
WorkerFlags `embed:""`
|
||||
Token string `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
|
||||
Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
|
||||
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
|
||||
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
|
||||
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
|
||||
@@ -59,7 +59,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
p = r.RunnerPort
|
||||
}
|
||||
|
||||
err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
|
||||
err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -99,7 +99,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
}
|
||||
}()
|
||||
|
||||
err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
|
||||
err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -31,7 +31,9 @@ type ApplicationConfig struct {
|
||||
PreloadModelsFromPath string
|
||||
CORSAllowOrigins string
|
||||
ApiKeys []string
|
||||
EnforcePredownloadScans bool
|
||||
OpaqueErrors bool
|
||||
P2PToken string
|
||||
|
||||
ModelLibraryURL string
|
||||
|
||||
@@ -95,6 +97,12 @@ func WithCsrf(b bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithP2PToken(s string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.P2PToken = s
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelLibraryURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ModelLibraryURL = url
|
||||
@@ -294,6 +302,12 @@ func WithApiKeys(apiKeys []string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithEnforcedPredownloadScans(enforced bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.EnforcePredownloadScans = enforced
|
||||
}
|
||||
}
|
||||
|
||||
func WithOpaqueErrors(opaque bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.OpaqueErrors = opaque
|
||||
|
||||
@@ -32,7 +32,7 @@ type BackendConfig struct {
|
||||
Threads *int `yaml:"threads"`
|
||||
Debug *bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Embeddings bool `yaml:"embeddings"`
|
||||
Embeddings *bool `yaml:"embeddings"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
|
||||
@@ -338,6 +338,10 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.LowVRAM = &falseV
|
||||
}
|
||||
|
||||
if cfg.Embeddings == nil {
|
||||
cfg.Embeddings = &falseV
|
||||
}
|
||||
|
||||
// Value passed by the top level are treated as default (no implicit defaults)
|
||||
// defaults are set by the user
|
||||
if ctx == 0 {
|
||||
|
||||
@@ -20,6 +20,7 @@ const (
|
||||
ChatML
|
||||
Mistral03
|
||||
Gemma
|
||||
DeepSeek2
|
||||
)
|
||||
|
||||
type settingsConfig struct {
|
||||
@@ -37,6 +38,17 @@ var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConf
|
||||
Completion: "{{.Input}}",
|
||||
},
|
||||
},
|
||||
DeepSeek2: {
|
||||
StopWords: []string{"<|end▁of▁sentence|>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
|
||||
{{ end -}}
|
||||
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<|end▁of▁sentence|>{{end}}
|
||||
{{if eq .RoleName "system" -}}{{.Content}}
|
||||
{{end -}}`,
|
||||
Chat: "{{.Input -}}\nAssistant: ",
|
||||
},
|
||||
},
|
||||
LLaMa3: {
|
||||
StopWords: []string{"<|eot_id|>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
@@ -208,8 +220,11 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||
qwen2 := arch == "qwen2"
|
||||
phi3 := arch == "phi-3"
|
||||
gemma := strings.HasPrefix(f.Model().Name, "gemma")
|
||||
deepseek2 := arch == "deepseek2"
|
||||
|
||||
switch {
|
||||
case deepseek2:
|
||||
return DeepSeek2
|
||||
case gemma:
|
||||
return Gemma
|
||||
case llama3:
|
||||
|
||||
@@ -7,15 +7,16 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/imdario/mergo"
|
||||
"dario.cat/mergo"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
// Installs a model from the gallery
|
||||
func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64)) error {
|
||||
func InstallModelFromGallery(galleries []config.Gallery, name string, basePath string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan bool) error {
|
||||
|
||||
applyModel := func(model *GalleryModel) error {
|
||||
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
|
||||
@@ -63,7 +64,7 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
|
||||
return err
|
||||
}
|
||||
|
||||
if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus); err != nil {
|
||||
if err := InstallModel(basePath, installName, &config, model.Overrides, downloadStatus, enforceScan); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -189,6 +190,12 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
|
||||
|
||||
galleryFile := filepath.Join(basePath, galleryFileName(name))
|
||||
|
||||
for _, f := range []string{configFile, galleryFile} {
|
||||
if err := utils.VerifyPath(f, basePath); err != nil {
|
||||
return fmt.Errorf("failed to verify path %s: %w", f, err)
|
||||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
// Delete all the files associated to the model
|
||||
// read the model config
|
||||
@@ -228,3 +235,29 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// This is ***NEVER*** going to be perfect or finished.
|
||||
// This is a BEST EFFORT function to surface known-vulnerable models to users.
|
||||
func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error {
|
||||
galleryModels, err := AvailableGalleryModels(galleries, basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, gM := range galleryModels {
|
||||
if gM.Installed {
|
||||
err = errors.Join(err, SafetyScanGalleryModel(gM))
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func SafetyScanGalleryModel(galleryModel *GalleryModel) error {
|
||||
for _, file := range galleryModel.AdditionalFiles {
|
||||
scanResults, err := downloader.HuggingFaceScan(file.URI)
|
||||
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
|
||||
log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
package gallery
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/imdario/mergo"
|
||||
"dario.cat/mergo"
|
||||
lconfig "github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/pkg/downloader"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
@@ -94,7 +95,7 @@ func ReadConfigFile(filePath string) (*Config, error) {
|
||||
return &config, nil
|
||||
}
|
||||
|
||||
func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64)) error {
|
||||
func InstallModel(basePath, nameOverride string, config *Config, configOverrides map[string]interface{}, downloadStatus func(string, string, string, float64), enforceScan bool) error {
|
||||
// Create base path if it doesn't exist
|
||||
err := os.MkdirAll(basePath, 0750)
|
||||
if err != nil {
|
||||
@@ -112,9 +113,18 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
|
||||
if err := utils.VerifyPath(file.Filename, basePath); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create file path
|
||||
filePath := filepath.Join(basePath, file.Filename)
|
||||
|
||||
if enforceScan {
|
||||
scanResults, err := downloader.HuggingFaceScan(file.URI)
|
||||
if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
|
||||
log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ var _ = Describe("Model test", func() {
|
||||
defer os.RemoveAll(tempdir)
|
||||
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {})
|
||||
err = InstallModel(tempdir, "", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "cerebras.yaml"} {
|
||||
@@ -69,7 +69,7 @@ var _ = Describe("Model test", func() {
|
||||
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
|
||||
Expect(models[0].Installed).To(BeFalse())
|
||||
|
||||
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {})
|
||||
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
dat, err := os.ReadFile(filepath.Join(tempdir, "bert.yaml"))
|
||||
@@ -106,7 +106,7 @@ var _ = Describe("Model test", func() {
|
||||
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {})
|
||||
err = InstallModel(tempdir, "foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
|
||||
@@ -122,7 +122,7 @@ var _ = Describe("Model test", func() {
|
||||
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {})
|
||||
err = InstallModel(tempdir, "foo", c, map[string]interface{}{"backend": "foo"}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for _, f := range []string{"cerebras", "cerebras-completion.tmpl", "cerebras-chat.tmpl", "foo.yaml"} {
|
||||
@@ -148,7 +148,7 @@ var _ = Describe("Model test", func() {
|
||||
c, err := ReadConfigFile(filepath.Join(os.Getenv("FIXTURES"), "gallery_simple.yaml"))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {})
|
||||
err = InstallModel(tempdir, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
|
||||
Expect(err).To(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -13,7 +15,7 @@ import (
|
||||
// If no model is specified, it will take the first available
|
||||
// Takes a model string as input which should be the one received from the user request.
|
||||
// It returns the model name resolved from the context and an error if any.
|
||||
func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
|
||||
func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
|
||||
if ctx.Params("model") != "" {
|
||||
modelInput = ctx.Params("model")
|
||||
}
|
||||
@@ -24,7 +26,7 @@ func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput stri
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelInput == "" && !bearerExists && firstModel {
|
||||
models, _ := loader.ListModels()
|
||||
models, _ := services.ListModels(cl, loader, "", true)
|
||||
if len(models) > 0 {
|
||||
modelInput = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/chasefleming/elem-go"
|
||||
"github.com/chasefleming/elem-go/attrs"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/xsync"
|
||||
)
|
||||
@@ -15,6 +16,14 @@ const (
|
||||
noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
|
||||
)
|
||||
|
||||
func renderElements(n []elem.Node) string {
|
||||
render := ""
|
||||
for _, r := range n {
|
||||
render += r.Render()
|
||||
}
|
||||
return render
|
||||
}
|
||||
|
||||
func DoneProgress(galleryID, text string, showDelete bool) string {
|
||||
var modelName = galleryID
|
||||
// Split by @ and grab the name
|
||||
@@ -72,6 +81,135 @@ func ProgressBar(progress string) string {
|
||||
).Render()
|
||||
}
|
||||
|
||||
func P2PNodeStats(nodes []p2p.NodeData) string {
|
||||
/*
|
||||
<div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
|
||||
<p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
|
||||
{{ $online := 0 }}
|
||||
{{ range .Nodes }}
|
||||
{{ if .IsOnline }}
|
||||
{{ $online = add $online 1 }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
<p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
|
||||
</div>
|
||||
*/
|
||||
|
||||
online := 0
|
||||
for _, n := range nodes {
|
||||
if n.IsOnline() {
|
||||
online++
|
||||
}
|
||||
}
|
||||
|
||||
class := "text-green-500"
|
||||
if online == 0 {
|
||||
class = "text-red-500"
|
||||
}
|
||||
/*
|
||||
<i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
|
||||
*/
|
||||
circle := elem.I(attrs.Props{
|
||||
"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
|
||||
})
|
||||
nodesElements := []elem.Node{
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": class,
|
||||
},
|
||||
circle,
|
||||
elem.Text(fmt.Sprintf("%d", online)),
|
||||
),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-gray-200",
|
||||
},
|
||||
elem.Text(fmt.Sprintf("/%d", len(nodes))),
|
||||
),
|
||||
}
|
||||
|
||||
return renderElements(nodesElements)
|
||||
}
|
||||
|
||||
func P2PNodeBoxes(nodes []p2p.NodeData) string {
|
||||
/*
|
||||
<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
|
||||
<div class="flex items-center mb-2">
|
||||
<i class="fas fa-desktop text-gray-400 mr-2"></i>
|
||||
<span class="text-gray-200 font-semibold">{{.ID}}</span>
|
||||
</div>
|
||||
<p class="text-sm text-gray-400 mt-2 flex items-center">
|
||||
Status:
|
||||
<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
|
||||
<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
|
||||
{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
*/
|
||||
|
||||
nodesElements := []elem.Node{}
|
||||
|
||||
for _, n := range nodes {
|
||||
|
||||
nodesElements = append(nodesElements,
|
||||
elem.Div(
|
||||
attrs.Props{
|
||||
"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
|
||||
},
|
||||
elem.P(
|
||||
attrs.Props{
|
||||
"class": "text-sm text-gray-400 mt-2 flex",
|
||||
},
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fas fa-desktop text-gray-400 mr-2",
|
||||
},
|
||||
),
|
||||
elem.Text("Name: "),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-gray-200 font-semibold ml-2 mr-1",
|
||||
},
|
||||
elem.Text(n.ID),
|
||||
),
|
||||
elem.Text("Status: "),
|
||||
elem.If(
|
||||
n.IsOnline(),
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
|
||||
},
|
||||
),
|
||||
elem.I(
|
||||
attrs.Props{
|
||||
"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
|
||||
},
|
||||
),
|
||||
),
|
||||
elem.If(
|
||||
n.IsOnline(),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-green-400",
|
||||
},
|
||||
|
||||
elem.Text("Online"),
|
||||
),
|
||||
elem.Span(
|
||||
attrs.Props{
|
||||
"class": "text-red-400",
|
||||
},
|
||||
elem.Text("Offline"),
|
||||
),
|
||||
),
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
return renderElements(nodesElements)
|
||||
}
|
||||
|
||||
func StartProgressBar(uid, progress, text string) string {
|
||||
if progress == "" {
|
||||
progress = "0"
|
||||
|
||||
@@ -28,7 +28,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||
return err
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, ml, input.ModelID, false)
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
|
||||
if err != nil {
|
||||
modelFile = input.ModelID
|
||||
log.Warn().Msgf("Model not found in context: %s", input.ModelID)
|
||||
|
||||
@@ -12,6 +12,11 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
|
||||
// @Summary Reranks a list of phrases by relevance to a given text query.
|
||||
// @Param request body schema.JINARerankRequest true "query params"
|
||||
// @Success 200 {object} schema.JINARerankResponse "Response"
|
||||
// @Router /v1/rerank [post]
|
||||
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
req := new(schema.JINARerankRequest)
|
||||
@@ -28,7 +33,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||
return err
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
|
||||
@@ -6,6 +6,11 @@ import (
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
)
|
||||
|
||||
// BackendMonitorEndpoint returns the status of the specified backend
|
||||
// @Summary Backend monitor endpoint
|
||||
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
|
||||
// @Success 200 {object} proto.StatusResponse "Response"
|
||||
// @Router /backend/monitor [get]
|
||||
func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
@@ -23,6 +28,10 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
|
||||
}
|
||||
}
|
||||
|
||||
// BackendMonitorEndpoint shuts down the specified backend
|
||||
// @Summary Backend monitor endpoint
|
||||
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
|
||||
// @Router /backend/shutdown [post]
|
||||
func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.BackendMonitorRequest)
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/google/uuid"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
@@ -33,6 +34,10 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath str
|
||||
}
|
||||
}
|
||||
|
||||
// GetOpStatusEndpoint returns the job status
|
||||
// @Summary Returns the job status
|
||||
// @Success 200 {object} gallery.GalleryOpStatus "Response"
|
||||
// @Router /models/jobs/{uuid} [get]
|
||||
func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
|
||||
@@ -43,12 +48,21 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// GetAllStatusEndpoint returns all the jobs status progress
|
||||
// @Summary Returns all the jobs status progress
|
||||
// @Success 200 {object} map[string]gallery.GalleryOpStatus "Response"
|
||||
// @Router /models/jobs [get]
|
||||
func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
return c.JSON(mgs.galleryApplier.GetAllStatus())
|
||||
}
|
||||
}
|
||||
|
||||
// ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
|
||||
// @Summary Install models to LocalAI.
|
||||
// @Param request body GalleryModel true "query params"
|
||||
// @Success 200 {object} schema.GalleryResponse "Response"
|
||||
// @Router /models/apply [post]
|
||||
func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(GalleryModel)
|
||||
@@ -68,13 +82,15 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
|
||||
Galleries: mgs.galleries,
|
||||
ConfigURL: input.ConfigURL,
|
||||
}
|
||||
return c.JSON(struct {
|
||||
ID string `json:"uuid"`
|
||||
StatusURL string `json:"status"`
|
||||
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
|
||||
// @Summary delete models to LocalAI.
|
||||
// @Param name path string true "Model name"
|
||||
// @Success 200 {object} schema.GalleryResponse "Response"
|
||||
// @Router /models/delete/{name} [post]
|
||||
func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelName := c.Params("name")
|
||||
@@ -89,13 +105,14 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
|
||||
return err
|
||||
}
|
||||
|
||||
return c.JSON(struct {
|
||||
ID string `json:"uuid"`
|
||||
StatusURL string `json:"status"`
|
||||
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||
}
|
||||
}
|
||||
|
||||
// ListModelFromGalleryEndpoint list the available models for installation from the active galleries
|
||||
// @Summary List installable models.
|
||||
// @Success 200 {object} []gallery.GalleryModel "Response"
|
||||
// @Router /models/available [get]
|
||||
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||
@@ -116,6 +133,10 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *f
|
||||
}
|
||||
}
|
||||
|
||||
// ListModelGalleriesEndpoint list the available galleries configured in LocalAI
|
||||
// @Summary List all Galleries
|
||||
// @Success 200 {object} []config.Gallery "Response"
|
||||
// @Router /models/galleries [get]
|
||||
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
|
||||
func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
@@ -128,6 +149,11 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib
|
||||
}
|
||||
}
|
||||
|
||||
// AddModelGalleryEndpoint adds a gallery in LocalAI
|
||||
// @Summary Adds a gallery in LocalAI
|
||||
// @Param request body config.Gallery true "Gallery details"
|
||||
// @Success 200 {object} []config.Gallery "Response"
|
||||
// @Router /models/galleries [post]
|
||||
func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(config.Gallery)
|
||||
@@ -150,6 +176,11 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.
|
||||
}
|
||||
}
|
||||
|
||||
// RemoveModelGalleryEndpoint remove a gallery in LocalAI
|
||||
// @Summary removes a gallery from LocalAI
|
||||
// @Param request body config.Gallery true "Gallery details"
|
||||
// @Success 200 {object} []config.Gallery "Response"
|
||||
// @Router /models/galleries [delete]
|
||||
func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(config.Gallery)
|
||||
@@ -165,6 +196,10 @@ func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fib
|
||||
mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
|
||||
return gallery.Name == input.Name
|
||||
})
|
||||
return c.Send(nil)
|
||||
dat, err := json.Marshal(mgs.galleries)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.Send(dat)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,8 +9,11 @@ import (
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
|
||||
// @Summary Prometheus metrics endpoint
|
||||
// @Param request body config.Gallery true "Gallery details"
|
||||
// @Router /metrics [get]
|
||||
func LocalAIMetricsEndpoint() fiber.Handler {
|
||||
|
||||
return adaptor.HTTPHandler(promhttp.Handler())
|
||||
}
|
||||
|
||||
|
||||
28
core/http/endpoints/localai/p2p.go
Normal file
28
core/http/endpoints/localai/p2p.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package localai
|
||||
|
||||
import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
// ShowP2PNodes returns the P2P Nodes
|
||||
// @Summary Returns available P2P nodes
|
||||
// @Success 200 {object} []schema.P2PNodesResponse "Response"
|
||||
// @Router /api/p2p [get]
|
||||
func ShowP2PNodes(c *fiber.Ctx) error {
|
||||
// Render index
|
||||
return c.JSON(schema.P2PNodesResponse{
|
||||
Nodes: p2p.GetAvailableNodes(""),
|
||||
FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
|
||||
})
|
||||
}
|
||||
|
||||
// ShowP2PToken returns the P2P token
|
||||
// @Summary Show the P2P token
|
||||
// @Success 200 {string} string "Response"
|
||||
// @Router /api/p2p/token [get]
|
||||
func ShowP2PToken(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error { return c.Send([]byte(appConfig.P2PToken)) }
|
||||
}
|
||||
@@ -29,7 +29,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||
return err
|
||||
}
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||
if err != nil {
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
@@ -11,7 +13,7 @@ import (
|
||||
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||
cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, _ := ml.ListModels()
|
||||
models, _ := services.ListModels(cl, ml, "", true)
|
||||
backendConfigs := cl.GetAllBackendConfigs()
|
||||
|
||||
galleryConfigs := map[string]*gallery.Config{}
|
||||
@@ -27,12 +29,21 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||
// Get model statuses to display in the UI the operation in progress
|
||||
processingModels, taskTypes := modelStatus()
|
||||
|
||||
modelsWithoutConfig := []string{}
|
||||
|
||||
for _, m := range models {
|
||||
if _, ok := galleryConfigs[m]; !ok {
|
||||
modelsWithoutConfig = append(modelsWithoutConfig, m)
|
||||
}
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"Models": models,
|
||||
"Models": modelsWithoutConfig,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"GalleryConfig": galleryConfigs,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"ApplicationConfig": appConfig,
|
||||
"ProcessingModels": processingModels,
|
||||
"TaskTypes": taskTypes,
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -79,7 +81,7 @@ func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
||||
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
|
||||
}
|
||||
|
||||
if !modelExists(ml, request.Model) {
|
||||
if !modelExists(cl, ml, request.Model) {
|
||||
log.Warn().Msgf("Model: %s was not found in list of models.", request.Model)
|
||||
return c.Status(fiber.StatusBadRequest).SendString("Model " + request.Model + " not found")
|
||||
}
|
||||
@@ -124,6 +126,14 @@ func generateRandomID() int64 {
|
||||
return currentId
|
||||
}
|
||||
|
||||
// ListAssistantsEndpoint is the OpenAI Assistant API endpoint to list assistents https://platform.openai.com/docs/api-reference/assistants/listAssistants
|
||||
// @Summary List available assistents
|
||||
// @Param limit query int false "Limit the number of assistants returned"
|
||||
// @Param order query string false "Order of assistants returned"
|
||||
// @Param after query string false "Return assistants created after the given ID"
|
||||
// @Param before query string false "Return assistants created before the given ID"
|
||||
// @Success 200 {object} []Assistant "Response"
|
||||
// @Router /v1/assistants [get]
|
||||
func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
// Because we're altering the existing assistants list we should just duplicate it for now.
|
||||
@@ -213,9 +223,9 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
|
||||
return filteredAssistants
|
||||
}
|
||||
|
||||
func modelExists(ml *model.ModelLoader, modelName string) (found bool) {
|
||||
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
|
||||
found = false
|
||||
models, err := ml.ListModels()
|
||||
models, err := services.ListModels(cl, ml, "", true)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -229,13 +239,11 @@ func modelExists(ml *model.ModelLoader, modelName string) (found bool) {
|
||||
return
|
||||
}
|
||||
|
||||
// DeleteAssistantEndpoint is the OpenAI Assistant API endpoint to delete assistents https://platform.openai.com/docs/api-reference/assistants/deleteAssistant
|
||||
// @Summary Delete assistents
|
||||
// @Success 200 {object} schema.DeleteAssistantResponse "Response"
|
||||
// @Router /v1/assistants/{assistant_id} [delete]
|
||||
func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
type DeleteAssistantResponse struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Deleted bool `json:"deleted"`
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
assistantID := c.Params("assistant_id")
|
||||
if assistantID == "" {
|
||||
@@ -246,7 +254,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
||||
if assistant.ID == assistantID {
|
||||
Assistants = append(Assistants[:i], Assistants[i+1:]...)
|
||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
|
||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{
|
||||
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantResponse{
|
||||
ID: assistantID,
|
||||
Object: "assistant.deleted",
|
||||
Deleted: true,
|
||||
@@ -255,7 +263,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
||||
}
|
||||
|
||||
log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
|
||||
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{
|
||||
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantResponse{
|
||||
ID: assistantID,
|
||||
Object: "assistant.deleted",
|
||||
Deleted: false,
|
||||
@@ -263,6 +271,10 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
||||
}
|
||||
}
|
||||
|
||||
// GetAssistantEndpoint is the OpenAI Assistant API endpoint to get assistents https://platform.openai.com/docs/api-reference/assistants/getAssistant
|
||||
// @Summary Get assistent data
|
||||
// @Success 200 {object} Assistant "Response"
|
||||
// @Router /v1/assistants/{assistant_id} [get]
|
||||
func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
assistantID := c.Params("assistant_id")
|
||||
@@ -292,19 +304,9 @@ var (
|
||||
AssistantsFileConfigFile = "assistantsFile.json"
|
||||
)
|
||||
|
||||
type AssistantFileRequest struct {
|
||||
FileID string `json:"file_id"`
|
||||
}
|
||||
|
||||
type DeleteAssistantFileResponse struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Deleted bool `json:"deleted"`
|
||||
}
|
||||
|
||||
func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
request := new(AssistantFileRequest)
|
||||
request := new(schema.AssistantFileRequest)
|
||||
if err := c.BodyParser(request); err != nil {
|
||||
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
|
||||
}
|
||||
@@ -345,7 +347,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
||||
|
||||
func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
type ListAssistantFiles struct {
|
||||
Data []File
|
||||
Data []schema.File
|
||||
Object string
|
||||
}
|
||||
|
||||
@@ -463,7 +465,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
||||
// Remove the file from the assistantFiles slice
|
||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
|
||||
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantFileResponse{
|
||||
ID: fileId,
|
||||
Object: "assistant.file.deleted",
|
||||
Deleted: true,
|
||||
@@ -479,7 +481,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||
|
||||
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
|
||||
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
|
||||
ID: fileId,
|
||||
Object: "assistant.file.deleted",
|
||||
Deleted: true,
|
||||
@@ -490,7 +492,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
||||
}
|
||||
log.Warn().Msgf("Unable to find assistant: %s", assistantID)
|
||||
|
||||
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
|
||||
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
|
||||
ID: fileId,
|
||||
Object: "assistant.file.deleted",
|
||||
Deleted: false,
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
@@ -26,7 +27,7 @@ type MockLoader struct {
|
||||
|
||||
func tearDown() func() {
|
||||
return func() {
|
||||
UploadedFiles = []File{}
|
||||
UploadedFiles = []schema.File{}
|
||||
Assistants = []Assistant{}
|
||||
AssistantFiles = []AssistantFile{}
|
||||
_ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
|
||||
@@ -294,7 +295,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
afr := AssistantFileRequest{FileID: file.ID}
|
||||
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||
|
||||
assert.NoError(t, err)
|
||||
@@ -305,7 +306,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
afr := AssistantFileRequest{FileID: file.ID}
|
||||
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
@@ -316,7 +317,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
afr := AssistantFileRequest{FileID: file.ID}
|
||||
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||
assert.NoError(t, err)
|
||||
t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
|
||||
@@ -338,7 +339,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||
assert.NoError(t, err)
|
||||
|
||||
afr := AssistantFileRequest{FileID: file.ID}
|
||||
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
@@ -349,7 +350,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (File, Assistant, error) {
|
||||
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (schema.File, Assistant, error) {
|
||||
ar := &AssistantRequest{
|
||||
Model: "ggml-gpt4all-j",
|
||||
Name: "3.5-turbo",
|
||||
@@ -362,7 +363,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
|
||||
|
||||
assistant, _, err := createAssistant(app, *ar)
|
||||
if err != nil {
|
||||
return File{}, Assistant{}, err
|
||||
return schema.File{}, Assistant{}, err
|
||||
}
|
||||
t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
|
||||
|
||||
@@ -374,7 +375,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
|
||||
return file, assistant, nil
|
||||
}
|
||||
|
||||
func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
|
||||
func createAssistantFile(app *fiber.App, afr schema.AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
|
||||
afrJson, err := json.Marshal(afr)
|
||||
if err != nil {
|
||||
return AssistantFile{}, nil, err
|
||||
@@ -451,7 +452,7 @@ func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId stri
|
||||
resp, err := app.Test(request)
|
||||
assert.NoError(t, err)
|
||||
|
||||
var dafr DeleteAssistantFileResponse
|
||||
var dafr schema.DeleteAssistantFileResponse
|
||||
err = json.NewDecoder(resp.Body).Decode(&dafr)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, dafr.Deleted)
|
||||
|
||||
@@ -159,7 +159,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelFile, input, err := readRequest(c, ml, startupOptions, true)
|
||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
@@ -225,18 +225,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
// Update input grammar
|
||||
// Handle if we should return "name" instead of "functions"
|
||||
if config.FunctionsConfig.FunctionName {
|
||||
jsStruct := funcs.ToJSONNameStructure()
|
||||
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
} else {
|
||||
jsStruct := funcs.ToJSONFunctionStructure()
|
||||
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
}
|
||||
jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
|
||||
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
case input.JSONFunctionGrammarObject != nil:
|
||||
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
case input.JSONFunctionGrammarObjectName != nil:
|
||||
config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
default:
|
||||
// Force picking one of the functions by the request
|
||||
if config.FunctionToCall() != "" {
|
||||
|
||||
@@ -57,7 +57,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelFile, input, err := readRequest(c, ml, appConfig, true)
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
@@ -16,9 +16,14 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// EditEndpoint is the OpenAI edit API endpoint
|
||||
// @Summary OpenAI edit endpoint
|
||||
// @Param request body schema.OpenAIRequest true "query params"
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/edits [post]
|
||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
modelFile, input, err := readRequest(c, ml, appConfig, true)
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ import (
|
||||
// @Router /v1/embeddings [post]
|
||||
func EmbeddingsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
model, input, err := readRequest(c, ml, appConfig, true)
|
||||
model, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
@@ -9,25 +9,16 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/pkg/utils"
|
||||
)
|
||||
|
||||
var UploadedFiles []File
|
||||
var UploadedFiles []schema.File
|
||||
|
||||
const UploadedFilesFile = "uploadedFiles.json"
|
||||
|
||||
// File represents the structure of a file object from the OpenAI API.
|
||||
type File struct {
|
||||
ID string `json:"id"` // Unique identifier for the file
|
||||
Object string `json:"object"` // Type of the object (e.g., "file")
|
||||
Bytes int `json:"bytes"` // Size of the file in bytes
|
||||
CreatedAt time.Time `json:"created_at"` // The time at which the file was created
|
||||
Filename string `json:"filename"` // The name of the file
|
||||
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
|
||||
}
|
||||
|
||||
// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
|
||||
func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
@@ -61,7 +52,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
|
||||
return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
|
||||
}
|
||||
|
||||
f := File{
|
||||
f := schema.File{
|
||||
ID: fmt.Sprintf("file-%d", getNextFileId()),
|
||||
Object: "file",
|
||||
Bytes: int(file.Size),
|
||||
@@ -84,14 +75,13 @@ func getNextFileId() int64 {
|
||||
}
|
||||
|
||||
// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
|
||||
// @Summary List files.
|
||||
// @Success 200 {object} schema.ListFiles "Response"
|
||||
// @Router /v1/files [get]
|
||||
func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
type ListFiles struct {
|
||||
Data []File
|
||||
Object string
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
var listFiles ListFiles
|
||||
var listFiles schema.ListFiles
|
||||
|
||||
purpose := c.Query("purpose")
|
||||
if purpose == "" {
|
||||
@@ -108,7 +98,7 @@ func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applica
|
||||
}
|
||||
}
|
||||
|
||||
func getFileFromRequest(c *fiber.Ctx) (*File, error) {
|
||||
func getFileFromRequest(c *fiber.Ctx) (*schema.File, error) {
|
||||
id := c.Params("file_id")
|
||||
if id == "" {
|
||||
return nil, fmt.Errorf("file_id parameter is required")
|
||||
@@ -123,7 +113,10 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
|
||||
return nil, fmt.Errorf("unable to find file id %s", id)
|
||||
}
|
||||
|
||||
// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
|
||||
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
|
||||
// @Summary Returns information about a specific file.
|
||||
// @Success 200 {object} schema.File "Response"
|
||||
// @Router /v1/files/{file_id} [get]
|
||||
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
file, err := getFileFromRequest(c)
|
||||
@@ -135,13 +128,17 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
|
||||
type DeleteStatus struct {
|
||||
Id string
|
||||
Object string
|
||||
Deleted bool
|
||||
}
|
||||
|
||||
// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
|
||||
// @Summary Delete a file.
|
||||
// @Success 200 {object} DeleteStatus "Response"
|
||||
// @Router /v1/files/{file_id} [delete]
|
||||
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
type DeleteStatus struct {
|
||||
Id string
|
||||
Object string
|
||||
Deleted bool
|
||||
}
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
file, err := getFileFromRequest(c)
|
||||
@@ -174,7 +171,11 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
|
||||
}
|
||||
}
|
||||
|
||||
// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
|
||||
// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
|
||||
// @Summary Returns information about a specific file.
|
||||
// @Success 200 {string} binary "file"
|
||||
// @Router /v1/files/{file_id}/content [get]
|
||||
// GetFilesContentsEndpoint
|
||||
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
file, err := getFileFromRequest(c)
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
utils2 "github.com/mudler/LocalAI/pkg/utils"
|
||||
@@ -22,11 +23,6 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
type ListFiles struct {
|
||||
Data []File
|
||||
Object string
|
||||
}
|
||||
|
||||
func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
|
||||
// Preparing the mocked objects
|
||||
loader = &config.BackendConfigLoader{}
|
||||
@@ -159,7 +155,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
|
||||
resp, _ := app.Test(req)
|
||||
assert.Equal(t, 200, resp.StatusCode)
|
||||
|
||||
var listFiles ListFiles
|
||||
var listFiles schema.ListFiles
|
||||
if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
|
||||
t.Errorf("Failed to decode response: %v", err)
|
||||
return
|
||||
@@ -201,7 +197,7 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos
|
||||
return app.Test(req)
|
||||
}
|
||||
|
||||
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File {
|
||||
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) schema.File {
|
||||
// Create a file that exceeds the limit
|
||||
testName := strings.Split(t.Name(), "/")[1]
|
||||
file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
|
||||
@@ -280,8 +276,8 @@ func bodyToByteArray(resp *http.Response, t *testing.T) []byte {
|
||||
return bodyBytes
|
||||
}
|
||||
|
||||
func responseToFile(t *testing.T, resp *http.Response) File {
|
||||
var file File
|
||||
func responseToFile(t *testing.T, resp *http.Response) schema.File {
|
||||
var file schema.File
|
||||
responseToString := bodyToString(resp, t)
|
||||
|
||||
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
|
||||
@@ -292,8 +288,8 @@ func responseToFile(t *testing.T, resp *http.Response) File {
|
||||
return file
|
||||
}
|
||||
|
||||
func responseToListFile(t *testing.T, resp *http.Response) ListFiles {
|
||||
var listFiles ListFiles
|
||||
func responseToListFile(t *testing.T, resp *http.Response) schema.ListFiles {
|
||||
var listFiles schema.ListFiles
|
||||
responseToString := bodyToString(resp, t)
|
||||
|
||||
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
|
||||
|
||||
@@ -66,7 +66,7 @@ func downloadFile(url string) (string, error) {
|
||||
// @Router /v1/images/generations [post]
|
||||
func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
m, input, err := readRequest(c, ml, appConfig, false)
|
||||
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
@@ -2,11 +2,17 @@ package openai
|
||||
|
||||
import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
|
||||
// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
|
||||
// @Summary List and describe the various models available in the API.
|
||||
// @Success 200 {object} schema.ModelsDataResponse "Response"
|
||||
// @Router /v1/models [get]
|
||||
func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
// If blank, no filter is applied.
|
||||
filter := c.Query("filter")
|
||||
@@ -14,16 +20,30 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
|
||||
// By default, exclude any loose files that are already referenced by a configuration file.
|
||||
excludeConfigured := c.QueryBool("excludeConfigured", true)
|
||||
|
||||
dataModels, err := lms.ListModels(filter, excludeConfigured)
|
||||
dataModels, err := modelList(bcl, ml, filter, excludeConfigured)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.JSON(struct {
|
||||
Object string `json:"object"`
|
||||
Data []schema.OpenAIModel `json:"data"`
|
||||
}{
|
||||
return c.JSON(schema.ModelsDataResponse{
|
||||
Object: "list",
|
||||
Data: dataModels,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
|
||||
|
||||
models, err := services.ListModels(bcl, ml, filter, excludeConfigured)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dataModels := []schema.OpenAIModel{}
|
||||
|
||||
// Then iterate through the loose files:
|
||||
for _, m := range models {
|
||||
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||
}
|
||||
|
||||
return dataModels, nil
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||
input := new(schema.OpenAIRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
@@ -31,7 +31,7 @@ func readRequest(c *fiber.Ctx, ml *model.ModelLoader, o *config.ApplicationConfi
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, firstModel)
|
||||
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, firstModel)
|
||||
|
||||
return modelFile, input, err
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ import (
|
||||
// @Router /v1/audio/transcriptions [post]
|
||||
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
m, input, err := readRequest(c, ml, appConfig, false)
|
||||
m, input, err := readRequest(c, cl, ml, appConfig, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"github.com/gofiber/swagger"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -56,6 +57,12 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
|
||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
|
||||
|
||||
// p2p
|
||||
if p2p.IsP2PEnabled() {
|
||||
app.Get("/api/p2p", auth, localai.ShowP2PNodes)
|
||||
app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
|
||||
}
|
||||
|
||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||
return c.JSON(struct {
|
||||
Version string `json:"version"`
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
@@ -81,8 +80,7 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
||||
app.Static("/generated-audio", appConfig.AudioDir)
|
||||
}
|
||||
|
||||
// models
|
||||
tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(tmpLMS))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(tmpLMS))
|
||||
// List models
|
||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/gallery"
|
||||
"github.com/mudler/LocalAI/core/http/elements"
|
||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||
"github.com/mudler/LocalAI/core/p2p"
|
||||
"github.com/mudler/LocalAI/core/services"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -26,7 +27,6 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
appConfig *config.ApplicationConfig,
|
||||
galleryService *services.GalleryService,
|
||||
auth func(*fiber.Ctx) error) {
|
||||
tmpLMS := services.NewListModelsService(ml, cl, appConfig) // TODO: once createApplication() is fully in use, reference the central instance.
|
||||
|
||||
// keeps the state of models that are being installed from the UI
|
||||
var processingModels = xsync.NewSyncedMap[string, string]()
|
||||
@@ -53,6 +53,37 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
|
||||
|
||||
if p2p.IsP2PEnabled() {
|
||||
app.Get("/p2p", auth, func(c *fiber.Ctx) error {
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - P2P dashboard",
|
||||
"Version": internal.PrintableVersion(),
|
||||
//"Nodes": p2p.GetAvailableNodes(""),
|
||||
//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"P2PToken": appConfig.P2PToken,
|
||||
}
|
||||
|
||||
// Render index
|
||||
return c.Render("views/p2p", summary)
|
||||
})
|
||||
|
||||
/* show nodes live! */
|
||||
app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes("")))
|
||||
})
|
||||
app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.FederatedID)))
|
||||
})
|
||||
|
||||
app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes("")))
|
||||
})
|
||||
app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
|
||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.FederatedID)))
|
||||
})
|
||||
}
|
||||
|
||||
// Show the Models page (all models)
|
||||
app.Get("/browse", auth, func(c *fiber.Ctx) error {
|
||||
term := c.Query("term")
|
||||
@@ -87,7 +118,9 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"AllTags": tags,
|
||||
"ProcessingModels": processingModelsData,
|
||||
"AvailableModels": len(models),
|
||||
"TaskTypes": taskTypes,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
|
||||
"TaskTypes": taskTypes,
|
||||
// "ApplicationConfig": appConfig,
|
||||
}
|
||||
|
||||
@@ -236,13 +269,14 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
// Show the Chat page
|
||||
app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs, _ := tmpLMS.ListModels("", true)
|
||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + c.Params("model"),
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -250,7 +284,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
})
|
||||
|
||||
app.Get("/talk/", auth, func(c *fiber.Ctx) error {
|
||||
backendConfigs, _ := tmpLMS.ListModels("", true)
|
||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
// If no model is available redirect to the index which suggests how to install models
|
||||
@@ -260,7 +294,8 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Talk",
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].ID,
|
||||
"Model": backendConfigs[0],
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
@@ -270,7 +305,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
|
||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
||||
|
||||
backendConfigs, _ := tmpLMS.ListModels("", true)
|
||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
||||
|
||||
if len(backendConfigs) == 0 {
|
||||
// If no model is available redirect to the index which suggests how to install models
|
||||
@@ -278,10 +313,11 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
}
|
||||
|
||||
summary := fiber.Map{
|
||||
"Title": "LocalAI - Chat with " + backendConfigs[0].ID,
|
||||
"Title": "LocalAI - Chat with " + backendConfigs[0],
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].ID,
|
||||
"Model": backendConfigs[0],
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -296,6 +332,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -316,6 +353,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -330,6 +368,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": c.Params("model"),
|
||||
"Version": internal.PrintableVersion(),
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
}
|
||||
|
||||
// Render index
|
||||
@@ -349,6 +388,7 @@ func RegisterUIRoutes(app *fiber.App,
|
||||
"Title": "LocalAI - Generate audio with " + backendConfigs[0].Name,
|
||||
"ModelsConfig": backendConfigs,
|
||||
"Model": backendConfigs[0].Name,
|
||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||
"Version": internal.PrintableVersion(),
|
||||
}
|
||||
|
||||
|
||||
29
core/http/static/assets/tw-elements.js
Normal file
29
core/http/static/assets/tw-elements.js
Normal file
File diff suppressed because one or more lines are too long
@@ -81,10 +81,10 @@ ul {
|
||||
li {
|
||||
font-size: 0.875rem; /* Small text size */
|
||||
color: #4a5568; /* Dark gray text */
|
||||
background-color: #f7fafc; /* Very light gray background */
|
||||
/* background-color: #f7fafc; Very light gray background */
|
||||
border-radius: 0.375rem; /* Rounded corners */
|
||||
padding: 0.5rem; /* Padding inside each list item */
|
||||
box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); /* Subtle shadow */
|
||||
/*box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06); Subtle shadow */
|
||||
margin-bottom: 0.5rem; /* Vertical space between list items */
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user