mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
440 Commits
v2.13.0
...
docker_ima
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95f773ee4b | ||
|
|
ad85c5a1e7 | ||
|
|
421eb8a727 | ||
|
|
b7ff441cc0 | ||
|
|
83d867ad46 | ||
|
|
6acba2bcbe | ||
|
|
6a2a10603c | ||
|
|
356907a5cf | ||
|
|
7ab7a188d0 | ||
|
|
ff1a5bfc62 | ||
|
|
522f185baf | ||
|
|
f7b5a4ca7d | ||
|
|
1d30955677 | ||
|
|
d3307e93d3 | ||
|
|
8d9a452e4b | ||
|
|
466eb82845 | ||
|
|
7e562d10a3 | ||
|
|
7b1e792732 | ||
|
|
30b883affe | ||
|
|
20ec4d0342 | ||
|
|
a9f8460086 | ||
|
|
98b3b2b1ab | ||
|
|
e8bc0a789b | ||
|
|
2b6a2c7dde | ||
|
|
c8c8238f9d | ||
|
|
3eaf59021c | ||
|
|
a8bfb6f9c2 | ||
|
|
b783c811db | ||
|
|
59af0e77af | ||
|
|
5d83c8d3a2 | ||
|
|
8f968d0341 | ||
|
|
f93fe30350 | ||
|
|
784ccf97ba | ||
|
|
a0163dafce | ||
|
|
f072cb3cd0 | ||
|
|
e84b31935c | ||
|
|
03b1cf51fd | ||
|
|
9e6dec0bc4 | ||
|
|
04b01cd62c | ||
|
|
a181dd0ebc | ||
|
|
69206fcd4b | ||
|
|
2c94e15746 | ||
|
|
12513ebae0 | ||
|
|
4156a4f15f | ||
|
|
491bb4f174 | ||
|
|
5866fc8ded | ||
|
|
eb4cd78ca6 | ||
|
|
40ce71855a | ||
|
|
9c0d0afd09 | ||
|
|
0f9aa1ef91 | ||
|
|
3ee5ceb9fa | ||
|
|
1bd72a3be5 | ||
|
|
fbd14118bf | ||
|
|
515d98b978 | ||
|
|
789cf6c599 | ||
|
|
0bc82d7270 | ||
|
|
9a7ad75bff | ||
|
|
9fb3e4040b | ||
|
|
070fd1b9da | ||
|
|
dda5b9f260 | ||
|
|
8d84dd4f88 | ||
|
|
f569237a50 | ||
|
|
e265a618d9 | ||
|
|
533343c84f | ||
|
|
260f2e1d94 | ||
|
|
964732590d | ||
|
|
70a2bfe82e | ||
|
|
ba2d969c44 | ||
|
|
d3c78cf4d7 | ||
|
|
34afd891a6 | ||
|
|
d3137775a1 | ||
|
|
e1772026a1 | ||
|
|
d0423254dd | ||
|
|
db0e52ae9d | ||
|
|
4f030f9cd3 | ||
|
|
60fb45eb97 | ||
|
|
43f0688a95 | ||
|
|
8142bdc48f | ||
|
|
89a11e15e7 | ||
|
|
06de542032 | ||
|
|
ecbb61cbf4 | ||
|
|
7f13e3a783 | ||
|
|
c926469b9c | ||
|
|
c30b57a629 | ||
|
|
2f297979a7 | ||
|
|
2437a2769d | ||
|
|
b58b7cad94 | ||
|
|
68148f2a1a | ||
|
|
4897eb0ba2 | ||
|
|
1b43966c48 | ||
|
|
c5f2f11503 | ||
|
|
895443d1b5 | ||
|
|
6a0802e8e6 | ||
|
|
94cfaad7f4 | ||
|
|
ac4a94dd44 | ||
|
|
58bf8614d9 | ||
|
|
3764e50b35 | ||
|
|
3f464d2d9e | ||
|
|
5116d561e1 | ||
|
|
96a7a3b59f | ||
|
|
112d0ffa45 | ||
|
|
25f45827ab | ||
|
|
f322f7c62d | ||
|
|
06351cbbb4 | ||
|
|
8f952d90b0 | ||
|
|
7b205510f9 | ||
|
|
f183fec232 | ||
|
|
91f48b2143 | ||
|
|
f404580256 | ||
|
|
882556d4db | ||
|
|
f8382adbf7 | ||
|
|
80298f94fa | ||
|
|
0f8b489346 | ||
|
|
154694462e | ||
|
|
347317d5d2 | ||
|
|
d40722d2fa | ||
|
|
7b12300f15 | ||
|
|
3c50abffdd | ||
|
|
2eb2ed84ab | ||
|
|
5da10fb769 | ||
|
|
bec883e3ff | ||
|
|
14b41be057 | ||
|
|
aff2acacf9 | ||
|
|
b4d4c0a18f | ||
|
|
3a5f2283ea | ||
|
|
d9109ffafb | ||
|
|
d7e137295a | ||
|
|
6c087ae743 | ||
|
|
88af1033d6 | ||
|
|
e96d2d7667 | ||
|
|
aae7ad9d73 | ||
|
|
23b3d22525 | ||
|
|
603d81dda1 | ||
|
|
a21a52d384 | ||
|
|
219078a5e0 | ||
|
|
3b7a78adda | ||
|
|
0d62594099 | ||
|
|
d38e9090df | ||
|
|
b049805c9b | ||
|
|
0f9b58f2cf | ||
|
|
0f134d557e | ||
|
|
2676e127ae | ||
|
|
270d4f8413 | ||
|
|
2d79cee8cb | ||
|
|
4c9623f50d | ||
|
|
596cf76135 | ||
|
|
a293aa1b79 | ||
|
|
c4eb02c80f | ||
|
|
9c9198ff08 | ||
|
|
83c79d5453 | ||
|
|
88fd000065 | ||
|
|
956d652314 | ||
|
|
9ce2b4d71f | ||
|
|
4e974cb4fc | ||
|
|
d072835796 | ||
|
|
17cf6c4a4d | ||
|
|
fab3e711ff | ||
|
|
4e1463fec2 | ||
|
|
2fc6fe806b | ||
|
|
bdd6769b2d | ||
|
|
1ffee9989f | ||
|
|
34ab442ce9 | ||
|
|
67aa31faad | ||
|
|
6ef78ef7f6 | ||
|
|
daa7544d9c | ||
|
|
34527737bb | ||
|
|
148adebe16 | ||
|
|
bae2a649fd | ||
|
|
90945ebab3 | ||
|
|
4a239a4bff | ||
|
|
5ddaa19914 | ||
|
|
77d752a481 | ||
|
|
29ff51c12a | ||
|
|
c0744899c9 | ||
|
|
c9092ad39c | ||
|
|
b588cae70e | ||
|
|
fb0f188c93 | ||
|
|
b99182c8d4 | ||
|
|
95c65d67f5 | ||
|
|
c603b95ac7 | ||
|
|
13cfa6de0a | ||
|
|
0560c6fd57 | ||
|
|
f24dddae42 | ||
|
|
06b461b061 | ||
|
|
e50a7ba879 | ||
|
|
3b2bce1fc9 | ||
|
|
3fe7e9f678 | ||
|
|
654b661688 | ||
|
|
7f387fb238 | ||
|
|
5d31e5269d | ||
|
|
ff8a6962cd | ||
|
|
10c64dbb55 | ||
|
|
3f7212c660 | ||
|
|
5dc6bace49 | ||
|
|
3cd5918ae6 | ||
|
|
5b75bf16c7 | ||
|
|
0c40f545d4 | ||
|
|
b2fc92daa7 | ||
|
|
0787797961 | ||
|
|
2ba9e27bcf | ||
|
|
4d98dd9ce7 | ||
|
|
087bceccac | ||
|
|
7064697ce5 | ||
|
|
0b99be73b3 | ||
|
|
669cd06dd9 | ||
|
|
2bbc52fcc8 | ||
|
|
577888f3c0 | ||
|
|
1c80f628ff | ||
|
|
10430a00bd | ||
|
|
9f5c274321 | ||
|
|
d075dc44dd | ||
|
|
be8ffbdfcf | ||
|
|
eaf653f3d3 | ||
|
|
e9c28a1ed7 | ||
|
|
ba984c7097 | ||
|
|
ff1f9125ed | ||
|
|
2c82058548 | ||
|
|
16433d2e8e | ||
|
|
345047ed7c | ||
|
|
6343758f9c | ||
|
|
135208806c | ||
|
|
3280de7adf | ||
|
|
db3113c5c8 | ||
|
|
593fb62bf0 | ||
|
|
480834f75b | ||
|
|
3200a6655e | ||
|
|
b90cdced59 | ||
|
|
fc3502b56f | ||
|
|
785adc1ed5 | ||
|
|
e25fc656c9 | ||
|
|
bb3ec56de3 | ||
|
|
785c54e7b0 | ||
|
|
003b43f6fc | ||
|
|
663488b6bd | ||
|
|
e1d6b706f4 | ||
|
|
29615576fb | ||
|
|
f8cea16c03 | ||
|
|
e0187c2a1a | ||
|
|
b76d2fe68a | ||
|
|
ee4f722bf8 | ||
|
|
dce63237f2 | ||
|
|
0b637465d9 | ||
|
|
114f549f5e | ||
|
|
ea330d452d | ||
|
|
eb11a46a73 | ||
|
|
b57e14d65c | ||
|
|
7efa8e75d4 | ||
|
|
7551369abe | ||
|
|
79915bcd11 | ||
|
|
c8d7d14a37 | ||
|
|
c56bc0de98 | ||
|
|
3a9408363b | ||
|
|
21a12c2cdd | ||
|
|
371d0cc1f7 | ||
|
|
23fa92bec0 | ||
|
|
f91e4e5c03 | ||
|
|
6cbe6a4f99 | ||
|
|
491e1d752b | ||
|
|
1542c58466 | ||
|
|
1a3dedece0 | ||
|
|
a58ff00ab1 | ||
|
|
fdb45153fe | ||
|
|
16474bfb40 | ||
|
|
5a6d120a56 | ||
|
|
7a480bb16f | ||
|
|
053531e434 | ||
|
|
b7ab4f25d9 | ||
|
|
73566a2bb2 | ||
|
|
8ccd5ab040 | ||
|
|
5a3db730b9 | ||
|
|
8ad669339e | ||
|
|
a10a952085 | ||
|
|
b37447cac5 | ||
|
|
f2d182a2eb | ||
|
|
6b6c8cdd5f | ||
|
|
5f35e85e86 | ||
|
|
02f1b477df | ||
|
|
9ab8f8f5e0 | ||
|
|
9a255d6453 | ||
|
|
e0ef9e2bb9 | ||
|
|
86627b27f7 | ||
|
|
4e92569d45 | ||
|
|
f7508e3888 | ||
|
|
badfc16df1 | ||
|
|
b584dcf18a | ||
|
|
4c845fb47d | ||
|
|
07c0559d06 | ||
|
|
beb598e4f9 | ||
|
|
c89271b2e4 | ||
|
|
29909666c3 | ||
|
|
566b5cf2ee | ||
|
|
a670318a9f | ||
|
|
84e2407afa | ||
|
|
c4186f13c3 | ||
|
|
4ac7956f68 | ||
|
|
e49ea0123b | ||
|
|
7123d07456 | ||
|
|
2db22087ae | ||
|
|
fa7b2aee9c | ||
|
|
4d70b6fb2d | ||
|
|
e2c3ffb09b | ||
|
|
b4cb22f444 | ||
|
|
5534b13903 | ||
|
|
5b79bd04a7 | ||
|
|
9d8c705fd9 | ||
|
|
310b2171be | ||
|
|
98af0b5d85 | ||
|
|
ca14f95d2c | ||
|
|
1b69b338c0 | ||
|
|
88942e4761 | ||
|
|
efa32a2677 | ||
|
|
dfc420706c | ||
|
|
e2de8a88f7 | ||
|
|
7f4febd6c2 | ||
|
|
93e581dfd0 | ||
|
|
cf513efa78 | ||
|
|
9e8b34427a | ||
|
|
88d0aa1e40 | ||
|
|
9b09eb005f | ||
|
|
4db41b71f3 | ||
|
|
28a421cb1d | ||
|
|
e6768097f4 | ||
|
|
18a04246fa | ||
|
|
f69de3be0d | ||
|
|
650ae620c5 | ||
|
|
6a209cbef6 | ||
|
|
9786bb826d | ||
|
|
9b4c6f348a | ||
|
|
cb6ddb21ec | ||
|
|
0baacca605 | ||
|
|
222d714ec7 | ||
|
|
fd2d89d37b | ||
|
|
6440b608dc | ||
|
|
1937118eab | ||
|
|
bc272d1e4b | ||
|
|
d651f390cd | ||
|
|
ea777f8716 | ||
|
|
eca5200fbd | ||
|
|
0809e9e7a0 | ||
|
|
b66baa3db6 | ||
|
|
6eb77f0d3a | ||
|
|
b20354b3ad | ||
|
|
d6f76c75e1 | ||
|
|
ed4f412f1c | ||
|
|
5bf56e01aa | ||
|
|
5ff5f0b393 | ||
|
|
6559ac11b1 | ||
|
|
02ec546dd6 | ||
|
|
995aa5ed21 | ||
|
|
e28ba4b807 | ||
|
|
d1e3436de5 | ||
|
|
d3ddc9e4aa | ||
|
|
fea9522982 | ||
|
|
fe055d4b36 | ||
|
|
581b894789 | ||
|
|
477655f6e6 | ||
|
|
169d8d21ff | ||
|
|
c5475020fe | ||
|
|
b52ff1249f | ||
|
|
c5798500cb | ||
|
|
67ad3532ec | ||
|
|
5cb96fe7df | ||
|
|
810e8e5855 | ||
|
|
f3bcc648e7 | ||
|
|
3096566333 | ||
|
|
f50c6a4e88 | ||
|
|
ab4ee54855 | ||
|
|
f2d35062d4 | ||
|
|
b69ff46c7e | ||
|
|
117c9873e1 | ||
|
|
17e94fbcb1 | ||
|
|
92f7feb874 | ||
|
|
b70e2bffa3 | ||
|
|
06c43ca285 | ||
|
|
530bec9c64 | ||
|
|
fa10302dd2 | ||
|
|
54faaa87ea | ||
|
|
daba8a85f9 | ||
|
|
ac0f3d6e82 | ||
|
|
da0b6a89ae | ||
|
|
929a68c06d | ||
|
|
a0aa5d01a1 | ||
|
|
dc834cc9d2 | ||
|
|
b58274b8a2 | ||
|
|
a31d00d904 | ||
|
|
2cc1bd85af | ||
|
|
2c5a46bc34 | ||
|
|
f7f8b4804b | ||
|
|
e5bd9a76c7 | ||
|
|
4690b534e0 | ||
|
|
6a7a7996bb | ||
|
|
962ebbaf77 | ||
|
|
f90d56d371 | ||
|
|
445cfd4db3 | ||
|
|
b24d44dc56 | ||
|
|
cd31f8d865 | ||
|
|
970cb3a219 | ||
|
|
f7aabf1b50 | ||
|
|
e38610e521 | ||
|
|
3754f154ee | ||
|
|
29d7812344 | ||
|
|
5fd46175dc | ||
|
|
52a268c38c | ||
|
|
53c3842bc2 | ||
|
|
c4f958e11b | ||
|
|
147440b39b | ||
|
|
baff5ff8c2 | ||
|
|
ea13863221 | ||
|
|
93ca56086e | ||
|
|
11c48a0004 | ||
|
|
b7ea9602f5 | ||
|
|
982dc6a2bd | ||
|
|
74d903acca | ||
|
|
5fef3b0ff1 | ||
|
|
0674893649 | ||
|
|
e8d44447ad | ||
|
|
a24cd4fda0 | ||
|
|
01860674c4 | ||
|
|
987b7ad42d | ||
|
|
21974fe1d3 | ||
|
|
26e1892521 | ||
|
|
a78cd67737 | ||
|
|
5e243ceaeb | ||
|
|
1a0a6f60a7 | ||
|
|
3179c019af | ||
|
|
a8089494fd | ||
|
|
a248ede222 | ||
|
|
0f0ae13ad0 | ||
|
|
773d5d23d5 | ||
|
|
c3982212f9 | ||
|
|
7e6bf6e7a1 | ||
|
|
9fc0135991 | ||
|
|
164be58445 | ||
|
|
1f8461767d | ||
|
|
935f4c23f6 | ||
|
|
4c97406f2b | ||
|
|
fb2a05ff43 | ||
|
|
030d555995 | ||
|
|
56d843c263 | ||
|
|
2dc1fa2474 |
@@ -6,6 +6,11 @@ examples/chatbot-ui/models
|
||||
examples/rwkv/models
|
||||
examples/**/models
|
||||
Dockerfile*
|
||||
__pycache__
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
.scannerwork
|
||||
|
||||
# backend virtual environments
|
||||
**/venv
|
||||
backend/python/**/source
|
||||
9
.env
9
.env
@@ -10,7 +10,7 @@
|
||||
#
|
||||
## Define galleries.
|
||||
## models will to install will be visible in `/models/available`
|
||||
# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
||||
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
|
||||
|
||||
## CORS settings
|
||||
# LOCALAI_CORS=true
|
||||
@@ -71,6 +71,11 @@
|
||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||
# LLAMACPP_PARALLEL=1
|
||||
|
||||
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
||||
# https://github.com/ggerganov/llama.cpp/pull/6829
|
||||
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
|
||||
# LLAMACPP_GRPC_SERVERS=""
|
||||
|
||||
### Enable to run parallel requests
|
||||
# LOCALAI_PARALLEL_REQUESTS=true
|
||||
|
||||
@@ -86,4 +91,4 @@
|
||||
# LOCALAI_WATCHDOG_BUSY=true
|
||||
#
|
||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
||||
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
||||
|
||||
2
.github/bump_docs.sh
vendored
2
.github/bump_docs.sh
vendored
@@ -2,6 +2,6 @@
|
||||
set -xe
|
||||
REPO=$1
|
||||
|
||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
|
||||
LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name')
|
||||
|
||||
cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
|
||||
|
||||
126
.github/checksum_checker.sh
vendored
Normal file
126
.github/checksum_checker.sh
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/bin/bash
|
||||
# This scripts needs yq and huggingface_hub to be installed
|
||||
# to install hugingface_hub run pip install huggingface_hub
|
||||
|
||||
# Path to the input YAML file
|
||||
input_yaml=$1
|
||||
|
||||
# Function to download file and check checksum using Python
|
||||
function check_and_update_checksum() {
|
||||
model_name="$1"
|
||||
file_name="$2"
|
||||
uri="$3"
|
||||
old_checksum="$4"
|
||||
idx="$5"
|
||||
|
||||
# Download the file and calculate new checksum using Python
|
||||
new_checksum=$(python3 -c "
|
||||
import hashlib
|
||||
from huggingface_hub import hf_hub_download, get_paths_info
|
||||
import requests
|
||||
import sys
|
||||
import os
|
||||
|
||||
uri = '$uri'
|
||||
file_name = uri.split('/')[-1]
|
||||
|
||||
# Function to parse the URI and determine download method
|
||||
# Function to parse the URI and determine download method
|
||||
def parse_uri(uri):
|
||||
if uri.startswith('huggingface://'):
|
||||
repo_id = uri.split('://')[1]
|
||||
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
||||
elif 'huggingface.co' in uri:
|
||||
parts = uri.split('/resolve/')
|
||||
if len(parts) > 1:
|
||||
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
||||
return 'huggingface', repo_path
|
||||
return 'direct', uri
|
||||
|
||||
def calculate_sha256(file_path):
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, 'rb') as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b''):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
download_type, repo_id_or_url = parse_uri(uri)
|
||||
|
||||
new_checksum = None
|
||||
|
||||
# Decide download method based on URI type
|
||||
if download_type == 'huggingface':
|
||||
# Use HF API to pull sha
|
||||
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
|
||||
try:
|
||||
new_checksum = file.lfs.sha256
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
if new_checksum is None:
|
||||
try:
|
||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
else:
|
||||
response = requests.get(repo_id_or_url)
|
||||
if response.status_code == 200:
|
||||
with open(file_name, 'wb') as f:
|
||||
f.write(response.content)
|
||||
file_path = file_name
|
||||
elif response.status_code == 404:
|
||||
print(f'File not found: {response.status_code}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if new_checksum is None:
|
||||
new_checksum = calculate_sha256(file_path)
|
||||
print(new_checksum)
|
||||
os.remove(file_path)
|
||||
else:
|
||||
print(new_checksum)
|
||||
|
||||
")
|
||||
|
||||
if [[ "$new_checksum" == "" ]]; then
|
||||
echo "Error calculating checksum for $file_name. Skipping..."
|
||||
return
|
||||
fi
|
||||
|
||||
echo "Checksum for $file_name: $new_checksum"
|
||||
|
||||
# Compare and update the YAML file if checksums do not match
|
||||
result=$?
|
||||
if [[ $result -eq 2 ]]; then
|
||||
echo "File not found, deleting entry for $file_name..."
|
||||
# yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\"))" "$input_yaml"
|
||||
elif [[ "$old_checksum" != "$new_checksum" ]]; then
|
||||
echo "Checksum mismatch for $file_name. Updating..."
|
||||
yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
|
||||
yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
|
||||
elif [[ $result -ne 0 ]]; then
|
||||
echo "Error downloading file $file_name. Skipping..."
|
||||
else
|
||||
echo "Checksum match for $file_name. No update needed."
|
||||
fi
|
||||
}
|
||||
|
||||
# Read the YAML and process each file
|
||||
len=$(yq eval '. | length' "$input_yaml")
|
||||
for ((i=0; i<$len; i++))
|
||||
do
|
||||
name=$(yq eval ".[$i].name" "$input_yaml")
|
||||
files_len=$(yq eval ".[$i].files | length" "$input_yaml")
|
||||
for ((j=0; j<$files_len; j++))
|
||||
do
|
||||
filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
|
||||
uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
|
||||
checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
|
||||
echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
|
||||
check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
|
||||
done
|
||||
done
|
||||
297
.github/ci/modelslist.go
vendored
Normal file
297
.github/ci/modelslist.go
vendored
Normal file
@@ -0,0 +1,297 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var modelPageTemplate string = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>LocalAI models</title>
|
||||
<link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
|
||||
<script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
|
||||
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
|
||||
/>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
|
||||
></script>
|
||||
|
||||
<link href="/static/general.css" rel="stylesheet" />
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
||||
<link
|
||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
|
||||
rel="stylesheet" />
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
|
||||
<script src="https://cdn.tailwindcss.com/3.3.0"></script>
|
||||
<script>
|
||||
tailwind.config = {
|
||||
darkMode: "class",
|
||||
theme: {
|
||||
fontFamily: {
|
||||
sans: ["Roboto", "sans-serif"],
|
||||
body: ["Roboto", "sans-serif"],
|
||||
mono: ["ui-monospace", "monospace"],
|
||||
},
|
||||
},
|
||||
corePlugins: {
|
||||
preflight: false,
|
||||
},
|
||||
};
|
||||
</script>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
|
||||
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
|
||||
</head>
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
<nav class="bg-gray-800 shadow-lg">
|
||||
<div class="container mx-auto px-4 py-4">
|
||||
<div class="flex items-center justify-between">
|
||||
<div class="flex items-center">
|
||||
<a href="/" class="text-white text-xl font-bold"><img src="https://github.com/mudler/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
|
||||
<a href="/" class="text-white text-xl font-bold">LocalAI</a>
|
||||
</div>
|
||||
<!-- Menu button for small screens -->
|
||||
<div class="lg:hidden">
|
||||
<button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
|
||||
<i class="fas fa-bars fa-lg"></i>
|
||||
</button>
|
||||
</div>
|
||||
<!-- Navigation links -->
|
||||
<div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
|
||||
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Collapsible menu for small screens -->
|
||||
<div class="hidden lg:hidden" id="mobile-menu">
|
||||
<div class="pt-4 pb-3 border-t border-gray-700">
|
||||
|
||||
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<style>
|
||||
.is-hidden {
|
||||
display: none;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="container mx-auto px-4 flex-grow">
|
||||
|
||||
<div class="models mt-12">
|
||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
||||
LocalAI model gallery list </h2><br>
|
||||
|
||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
||||
|
||||
🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a></h2>
|
||||
|
||||
<h3>
|
||||
Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
|
||||
|
||||
You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
|
||||
</h3>
|
||||
|
||||
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
|
||||
id="searchbox" placeholder="Live search keyword..">
|
||||
<div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
|
||||
{{ range $_, $model := .Models }}
|
||||
<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
|
||||
<div>
|
||||
{{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
|
||||
{{ if $model.Icon }}
|
||||
{{ $icon = $model.Icon }}
|
||||
{{ end }}
|
||||
<div class="flex justify-center items-center">
|
||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
|
||||
</div>
|
||||
<div class="p-6 text-surface dark:text-white">
|
||||
<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
|
||||
|
||||
|
||||
<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
|
||||
|
||||
</div>
|
||||
<div class="px-6 pt-4 pb-2">
|
||||
|
||||
<!-- Modal toggle -->
|
||||
<button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
|
||||
More info
|
||||
</button>
|
||||
|
||||
<!-- Main modal -->
|
||||
<div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
|
||||
<div class="relative p-4 w-full max-w-2xl max-h-full">
|
||||
<!-- Modal content -->
|
||||
<div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
|
||||
<!-- Modal header -->
|
||||
<div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
|
||||
<h3 class="text-xl font-semibold text-gray-900 dark:text-white">
|
||||
{{ $model.Name}}
|
||||
</h3>
|
||||
<button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
|
||||
<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
|
||||
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
|
||||
</svg>
|
||||
<span class="sr-only">Close modal</span>
|
||||
</button>
|
||||
</div>
|
||||
<!-- Modal body -->
|
||||
<div class="p-4 md:p-5 space-y-4">
|
||||
<div class="flex justify-center items-center">
|
||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
|
||||
</div>
|
||||
|
||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
||||
{{ $model.Description }}
|
||||
|
||||
</p>
|
||||
|
||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
||||
To install the model with the CLI, run: <br>
|
||||
<code> local-ai models install {{$model.Name}} </code> <br>
|
||||
|
||||
<hr>
|
||||
See also <a href="https://localai.io/models/" target="_blank" >
|
||||
Installation <i class="fas fa-circle-info pr-2"></i>
|
||||
</a> to see how to install models with the REST API.
|
||||
</p>
|
||||
|
||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
||||
<ul>
|
||||
{{ range $_, $u := $model.URLs }}
|
||||
<li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
</p>
|
||||
</div>
|
||||
<!-- Modal footer -->
|
||||
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
|
||||
<button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
var lazyLoadInstance = new LazyLoad({
|
||||
// Your custom settings go here
|
||||
});
|
||||
|
||||
let cards = document.querySelectorAll('.box')
|
||||
|
||||
function liveSearch() {
|
||||
let search_query = document.getElementById("searchbox").value;
|
||||
|
||||
//Use innerText if all contents are visible
|
||||
//Use textContent for including hidden elements
|
||||
for (var i = 0; i < cards.length; i++) {
|
||||
if(cards[i].textContent.toLowerCase()
|
||||
.includes(search_query.toLowerCase())) {
|
||||
cards[i].classList.remove("is-hidden");
|
||||
} else {
|
||||
cards[i].classList.add("is-hidden");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//A little delay
|
||||
let typingTimer;
|
||||
let typeInterval = 500;
|
||||
let searchInput = document.getElementById('searchbox');
|
||||
|
||||
searchInput.addEventListener('keyup', () => {
|
||||
clearTimeout(typingTimer);
|
||||
typingTimer = setTimeout(liveSearch, typeInterval);
|
||||
});
|
||||
</script>
|
||||
|
||||
</div>
|
||||
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
type GalleryModel struct {
|
||||
Name string `json:"name" yaml:"name"`
|
||||
URLs []string `json:"urls" yaml:"urls"`
|
||||
Icon string `json:"icon" yaml:"icon"`
|
||||
Description string `json:"description" yaml:"description"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
// read the YAML file which contains the models
|
||||
|
||||
f, err := ioutil.ReadFile(os.Args[1])
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return
|
||||
}
|
||||
|
||||
models := []*GalleryModel{}
|
||||
err = yaml.Unmarshal(f, &models)
|
||||
if err != nil {
|
||||
// write to stderr
|
||||
os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
|
||||
return
|
||||
}
|
||||
|
||||
// render the template
|
||||
data := struct {
|
||||
Models []*GalleryModel
|
||||
AvailableModels int
|
||||
}{
|
||||
Models: models,
|
||||
AvailableModels: len(models),
|
||||
}
|
||||
tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
|
||||
|
||||
err = tmpl.Execute(os.Stdout, data)
|
||||
if err != nil {
|
||||
fmt.Println("Error executing template:", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
7
.github/labeler.yml
vendored
7
.github/labeler.yml
vendored
@@ -8,6 +8,11 @@ kind/documentation:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '*.md'
|
||||
|
||||
area/ai-model:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'gallery/*'
|
||||
|
||||
examples:
|
||||
- any:
|
||||
- changed-files:
|
||||
@@ -16,4 +21,4 @@ examples:
|
||||
ci:
|
||||
- any:
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '.github/*'
|
||||
- any-glob-to-any-file: '.github/*'
|
||||
|
||||
47
.github/workflows/checksum_checker.yaml
vendored
Normal file
47
.github/workflows/checksum_checker.yaml
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
name: Check if checksums are up-to-date
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
checksum_check:
|
||||
runs-on: arc-runner-set
|
||||
steps:
|
||||
- name: Force Install GIT latest
|
||||
run: |
|
||||
sudo apt-get update \
|
||||
&& sudo apt-get install -y software-properties-common \
|
||||
&& sudo apt-get update \
|
||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y git
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y pip wget
|
||||
sudo pip install --upgrade pip
|
||||
pip install huggingface_hub
|
||||
- name: 'Setup yq'
|
||||
uses: dcarbone/install-yq-action@v1.1.1
|
||||
with:
|
||||
version: 'v4.43.1'
|
||||
download-compressed: true
|
||||
force: true
|
||||
|
||||
- name: Checksum checker 🔧
|
||||
run: |
|
||||
export HF_HOME=/hf_cache
|
||||
sudo mkdir /hf_cache
|
||||
sudo chmod 777 /hf_cache
|
||||
bash .github/checksum_checker.sh gallery/index.yaml
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
|
||||
title: 'models(gallery): :arrow_up: update checksum'
|
||||
branch: "update/checksum"
|
||||
body: Updating checksums in gallery/index.yaml
|
||||
signoff: true
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.0.0
|
||||
uses: dependabot/fetch-metadata@v2.1.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
12
.github/workflows/generate_grpc_cache.yaml
vendored
12
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -1,7 +1,10 @@
|
||||
name: 'generate and publish GRPC docker caches'
|
||||
|
||||
on:
|
||||
- workflow_dispatch
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -14,7 +17,7 @@ jobs:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
- name: Release space from worker
|
||||
@@ -80,11 +83,12 @@ jobs:
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.58.0
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.64.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-to: type=gha,ignore-error=true
|
||||
cache-from: type=gha
|
||||
target: grpc
|
||||
platforms: ${{ matrix.platforms }}
|
||||
push: false
|
||||
59
.github/workflows/generate_intel_image.yaml
vendored
Normal file
59
.github/workflows/generate_intel_image.yaml
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
name: 'generate and publish intel docker caches'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_caches:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Login to quay
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache Intel images
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BASE_IMAGE=${{ matrix.base-image }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
tags: quay.io/go-skynet/intel-oneapi-base:latest
|
||||
push: true
|
||||
target: intel
|
||||
platforms: ${{ matrix.platforms }}
|
||||
23
.github/workflows/image-pr.yml
vendored
23
.github/workflows/image-pr.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
strategy:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
@@ -46,7 +46,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-minor-version: "5"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -61,14 +61,14 @@ jobs:
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-minor-version: "5"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
@@ -127,4 +127,13 @@ jobs:
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-vulkan-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
|
||||
63
.github/workflows/image.yml
vendored
63
.github/workflows/image.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
strategy:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
|
||||
matrix:
|
||||
include:
|
||||
# Extra images
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11'
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-minor-version: "5"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12'
|
||||
@@ -86,7 +86,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-minor-version: "5"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
aio: "-aio-gpu-hipblas"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
latest-image: 'latest-gpu-hipblas'
|
||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||
@@ -141,14 +141,14 @@ jobs:
|
||||
tag-suffix: '-hipblas'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'extras'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
@@ -175,7 +175,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-core'
|
||||
ffmpeg: 'false'
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-core'
|
||||
ffmpeg: 'false'
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
@@ -205,7 +205,7 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
@@ -218,7 +218,7 @@ jobs:
|
||||
tag-suffix: '-hipblas-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
@@ -228,11 +228,11 @@ jobs:
|
||||
tag-suffix: '-hipblas-core'
|
||||
ffmpeg: 'false'
|
||||
image-type: 'core'
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=3 --output-sync=target"
|
||||
|
||||
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
@@ -257,61 +257,72 @@ jobs:
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: 'arc-runner-set'
|
||||
aio: "-aio-cpu"
|
||||
latest-image: 'latest-cpu'
|
||||
latest-image-aio: 'latest-aio-cpu'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-minor-version: "5"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-core'
|
||||
ffmpeg: ''
|
||||
image-type: 'core'
|
||||
base-image: "ubuntu:22.04"
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: 'arc-runner-set'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "11"
|
||||
cuda-minor-version: "7"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
cuda-minor-version: "5"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
- build-type: 'vulkan'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-vulkan-ffmpeg-core'
|
||||
latest-image: 'latest-vulkan-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'arc-runner-set'
|
||||
base-image: "ubuntu:22.04"
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
|
||||
87
.github/workflows/image_build.yml
vendored
87
.github/workflows/image_build.yml
vendored
@@ -19,11 +19,11 @@ on:
|
||||
type: string
|
||||
cuda-major-version:
|
||||
description: 'CUDA major version'
|
||||
default: "11"
|
||||
default: "12"
|
||||
type: string
|
||||
cuda-minor-version:
|
||||
description: 'CUDA minor version'
|
||||
default: "7"
|
||||
default: "5"
|
||||
type: string
|
||||
platforms:
|
||||
description: 'Platforms'
|
||||
@@ -136,6 +136,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
@@ -148,7 +149,20 @@ jobs:
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
|
||||
- name: Docker meta for PR
|
||||
id: meta_pull_request
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
ttl.sh/localai-ci-pr-${{ github.event.number }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
- name: Docker meta AIO (quay.io)
|
||||
if: inputs.aio != ''
|
||||
id: meta_aio
|
||||
@@ -174,7 +188,6 @@ jobs:
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.aio }}
|
||||
|
||||
- name: Set up QEMU
|
||||
@@ -201,30 +214,15 @@ jobs:
|
||||
username: ${{ secrets.quayUsername }}
|
||||
password: ${{ secrets.quayPassword }}
|
||||
|
||||
- name: Cache GRPC
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
if: github.event_name != 'pull_request'
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.58.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
target: grpc
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: false
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
@@ -232,6 +230,9 @@ jobs:
|
||||
FFMPEG=${{ inputs.ffmpeg }}
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.64.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -240,15 +241,39 @@ jobs:
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
- name: Inspect image
|
||||
if: github.event_name != 'pull_request'
|
||||
### Start testing image
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
if: github.event_name == 'pull_request'
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||
FFMPEG=${{ inputs.ffmpeg }}
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.64.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: true
|
||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
||||
- name: Testing image
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||
docker image inspect localai/localai:${{ steps.meta.outputs.version }}
|
||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||
docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||
|
||||
echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||
## End testing image
|
||||
- name: Build and push AIO image
|
||||
if: inputs.aio != ''
|
||||
uses: docker/build-push-action@v5
|
||||
@@ -299,7 +324,7 @@ jobs:
|
||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||
|
||||
|
||||
- name: job summary
|
||||
run: |
|
||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
290
.github/workflows/release.yaml
vendored
290
.github/workflows/release.yaml
vendored
@@ -1,11 +1,11 @@
|
||||
name: Build and Release
|
||||
|
||||
on:
|
||||
on:
|
||||
- push
|
||||
- pull_request
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.58.0
|
||||
GRPC_VERSION: v1.64.0
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -15,20 +15,8 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-linux:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build: 'avx2'
|
||||
defines: ''
|
||||
- build: 'avx'
|
||||
defines: '-DLLAMA_AVX2=OFF'
|
||||
- build: 'avx512'
|
||||
defines: '-DLLAMA_AVX512=ON'
|
||||
- build: 'cuda12'
|
||||
defines: ''
|
||||
- build: 'cuda11'
|
||||
defines: ''
|
||||
|
||||
build-linux-arm:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
@@ -39,22 +27,166 @@ jobs:
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-5
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: grpc
|
||||
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
|
||||
- name: Build grpc
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
|
||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make --jobs 5 --output-sync=target
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
GNU_HOST=aarch64-linux-gnu
|
||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
|
||||
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
|
||||
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
||||
GRPC_DIR=$PWD/grpc
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
||||
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
||||
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
||||
cd $GRPC_CROSS_BUILD_DIR && \
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
|
||||
../.. && \
|
||||
sudo make -j`nproc` install
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
GNU_HOST=aarch64-linux-gnu
|
||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
||||
GOOS=linux \
|
||||
GOARCH=arm64 \
|
||||
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-linux-arm64
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
build-linux:
|
||||
runs-on: arc-runner-set
|
||||
steps:
|
||||
- name: Force Install GIT latest
|
||||
run: |
|
||||
sudo apt-get update \
|
||||
&& sudo apt-get install -y software-properties-common \
|
||||
&& sudo apt-get update \
|
||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y git
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler
|
||||
- name: Install CUDA Dependencies
|
||||
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
|
||||
sudo apt update
|
||||
sudo apt install -y intel-basekit
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" == "cuda12" ]; then
|
||||
export CUDA_VERSION=12-3
|
||||
else
|
||||
export CUDA_VERSION=11-7
|
||||
fi
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-3
|
||||
- name: "Install Hipblas"
|
||||
env:
|
||||
ROCM_VERSION: "6.1"
|
||||
AMDGPU_VERSION: "6.1"
|
||||
run: |
|
||||
set -ex
|
||||
|
||||
sudo apt-get update
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
|
||||
|
||||
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
|
||||
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
|
||||
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt-get update
|
||||
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
hipblas-dev rocm-dev \
|
||||
rocblas-dev
|
||||
|
||||
sudo apt-get clean
|
||||
sudo rm -rf /var/lib/apt/lists/*
|
||||
sudo ldconfig
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -71,25 +203,23 @@ jobs:
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
||||
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||
BUILD_ID: "${{ matrix.build }}"
|
||||
run: |
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
|
||||
export BUILD_TYPE=cublas
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
make dist
|
||||
else
|
||||
STATIC=true make dist
|
||||
fi
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
export PATH=/opt/rocm/bin:$PATH
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
|
||||
make -j4 dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-linux-${{ matrix.build }}
|
||||
name: LocalAI-linux
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
@@ -97,7 +227,13 @@ jobs:
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
build-stablediffusion:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -111,58 +247,21 @@ jobs:
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
- name: Build stablediffusion
|
||||
run: |
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
make backend-assets/grpc/stablediffusion
|
||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||
env:
|
||||
GO_TAGS: stablediffusion
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: stablediffusion
|
||||
path: release/
|
||||
|
||||
build-macOS:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build: 'avx2'
|
||||
defines: ''
|
||||
- build: 'avx'
|
||||
defines: '-DLLAMA_AVX2=OFF'
|
||||
- build: 'avx512'
|
||||
defines: '-DLLAMA_AVX512=ON'
|
||||
runs-on: macOS-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||
BUILD_ID: "${{ matrix.build }}"
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-MacOS-${{ matrix.build }}
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
@@ -170,17 +269,7 @@ jobs:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
|
||||
build-macOS-arm64:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build: 'avx2'
|
||||
defines: ''
|
||||
- build: 'avx'
|
||||
defines: '-DLLAMA_AVX2=OFF'
|
||||
- build: 'avx512'
|
||||
defines: '-DLLAMA_AVX512=ON'
|
||||
runs-on: macos-14
|
||||
steps:
|
||||
- name: Clone
|
||||
@@ -194,21 +283,19 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||
BUILD_ID: "${{ matrix.build }}"
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
make dist
|
||||
|
||||
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-MacOS-arm64-${{ matrix.build }}
|
||||
name: LocalAI-MacOS-arm64
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
@@ -216,3 +303,10 @@ jobs:
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
185
.github/workflows/test-extra.yml
vendored
185
.github/workflows/test-extra.yml
vendored
@@ -25,22 +25,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test transformers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||
|
||||
@@ -55,22 +47,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test sentencetransformers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||
|
||||
@@ -86,22 +70,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test rerankers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers test
|
||||
|
||||
@@ -115,25 +91,16 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y build-essential ffmpeg
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test diffusers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||
|
||||
tests-parler-tts:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -146,24 +113,38 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test parler-tts
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||
|
||||
tests-openvoice:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test openvoice
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
||||
|
||||
tests-transformers-musicgen:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -176,22 +157,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test transformers-musicgen
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||
|
||||
@@ -208,22 +181,14 @@ jobs:
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
|
||||
# - name: Test petals
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
||||
|
||||
@@ -280,22 +245,14 @@ jobs:
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
|
||||
# - name: Test bark
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make --jobs=5 --output-sync=target -C backend/python/bark
|
||||
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
||||
|
||||
@@ -313,20 +270,13 @@ jobs:
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
# - name: Test vllm
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||
tests-vallex:
|
||||
@@ -340,20 +290,13 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test vall-e-x
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
||||
|
||||
@@ -368,19 +311,11 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||
pip install --user grpcio-tools
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test coqui
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||
28
.github/workflows/test.yml
vendored
28
.github/workflows/test.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.58.0
|
||||
GRPC_VERSION: v1.64.0
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -57,7 +57,7 @@ jobs:
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
@@ -78,6 +78,8 @@ jobs:
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
|
||||
@@ -85,8 +87,14 @@ jobs:
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
|
||||
# The python3-grpc-tools package in 22.04 is too old
|
||||
pip install --user grpcio-tools
|
||||
@@ -100,6 +108,8 @@ jobs:
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
env:
|
||||
CUDA_VERSION: 12-3
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -164,11 +174,11 @@ jobs:
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Build images
|
||||
run: |
|
||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||
- name: Test
|
||||
run: |
|
||||
@@ -190,7 +200,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
@@ -203,14 +213,14 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||
pip install --user grpcio-tools
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
|
||||
9
.gitignore
vendored
9
.gitignore
vendored
@@ -6,6 +6,9 @@ get-sources
|
||||
prepare-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
/backend/cpp/llama/llama.cpp
|
||||
/backend/cpp/llama-*
|
||||
|
||||
*.log
|
||||
|
||||
go-ggml-transformers
|
||||
go-gpt2
|
||||
@@ -39,6 +42,7 @@ backend-assets/*
|
||||
!backend-assets/.keep
|
||||
prepare
|
||||
/ggml-metal.metal
|
||||
docs/static/gallery.html
|
||||
|
||||
# Protobuf generated files
|
||||
*.pb.go
|
||||
@@ -46,4 +50,7 @@ prepare
|
||||
*pb2_grpc.py
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
.scannerwork
|
||||
|
||||
# backend virtual environments
|
||||
**/venv
|
||||
|
||||
481
Dockerfile
481
Dockerfile
@@ -1,44 +1,46 @@
|
||||
ARG IMAGE_TYPE=extras
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# extras or core
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM ${BASE_IMAGE} AS requirements-core
|
||||
# TODO(mudler): install all accellerators here
|
||||
# and use make dist instead of build.
|
||||
# TODO(mudler): modify make dist to build also go-piper and stablediffusion
|
||||
# This way the same binary can work for everything(!)
|
||||
# TODO(mudler): also make sure that we bundle all the required libs in the backend-assets/lib
|
||||
# For the GPU-accell we are going to generate a tar file instead that will be extracted by the bash installer, and the libs will also be installed in the final docker image, so no need to pull ALL the dependencies
|
||||
|
||||
USER root
|
||||
|
||||
ARG GO_VERSION=1.21.7
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
ARG GO_VERSION=1.22.4
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ccache \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
unzip && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH $PATH:/usr/local/go/bin
|
||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers
|
||||
ENV PATH $PATH:/root/go/bin
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
|
||||
# Install protobuf (the version in 22.04 is too old)
|
||||
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
|
||||
# Install grpcio-tools (the version in 22.04 is too old)
|
||||
RUN pip install --user grpcio-tools
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
@@ -47,27 +49,21 @@ RUN update-ca-certificates
|
||||
RUN echo "Target Architecture: $TARGETARCH"
|
||||
RUN echo "Target Variant: $TARGETVARIANT"
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get install -y software-properties-common && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||
; fi
|
||||
|
||||
# Cuda
|
||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||
|
||||
# HipBLAS requirements
|
||||
ENV PATH /opt/rocm/bin:${PATH}
|
||||
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get install -y \
|
||||
libopenblas-dev \
|
||||
libopencv-dev \
|
||||
&& apt-get clean
|
||||
# OpenBLAS requirements and stable diffusion, tts (espeak)
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
espeak-ng \
|
||||
espeak \
|
||||
libopencv-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up OpenCV
|
||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
@@ -80,58 +76,251 @@ RUN test -n "$TARGETARCH" \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||
FROM requirements-core AS requirements-extras
|
||||
|
||||
RUN apt install -y gpg && \
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y conda && apt-get clean
|
||||
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN apt-get install -y python3-pip && apt-get clean
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN apt-get install -y espeak-ng espeak && apt-get clean
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
python3-dev \
|
||||
python3-venv && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
pip install --upgrade pip
|
||||
|
||||
RUN if [ ! -e /usr/bin/python ]; then \
|
||||
ln -s /usr/bin/python3 /usr/bin/python \
|
||||
# Install grpcio-tools (the version in 22.04 is too old)
|
||||
RUN pip install --user grpcio-tools
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# Base image for the build-type.
|
||||
FROM requirements-${IMAGE_TYPE} AS run-requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=5
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
vulkan-sdk && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
hipblas-dev \
|
||||
rocblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||
ldconfig \
|
||||
; fi
|
||||
|
||||
# The build-requirements-drivers target is for BUILD_TYPE specific items. If you need to install something specific to CUDA, or specific to ROCM, it goes here.
|
||||
# This target will be built on top of requirements-core or requirements-extras as retermined by the IMAGE_TYPE build-arg
|
||||
FROM requirements-${IMAGE_TYPE} AS build-requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ARG CUDA_MINOR_VERSION=5
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# Vulkan requirements
|
||||
RUN <<EOT bash
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils wget gpg-agent && \
|
||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
vulkan-sdk && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
EOT
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN <<EOT bash
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
EOT
|
||||
|
||||
# clblas
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libclblast-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# intel
|
||||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && apt update && apt install -y intel-basekit && apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# hipblas
|
||||
RUN wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
|
||||
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && apt-get update && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/amdgpu/6.1.2/ubuntu jammy main" \
|
||||
| tee /etc/apt/sources.list.d/amdgpu.list && \
|
||||
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.2 jammy main" | tee --append /etc/apt/sources.list.d/rocm.list && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 && \
|
||||
apt update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
hipblas-dev rocm-dev \
|
||||
rocblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||
ldconfig
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# Temporary workaround for Intel's repository to work correctly
|
||||
# https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
|
||||
# This is a temporary workaround until Intel fixes their repository
|
||||
FROM ${INTEL_BASE_IMAGE} AS intel
|
||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
||||
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
||||
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
|
||||
ARG MAKEFLAGS
|
||||
ARG GRPC_VERSION=v1.58.0
|
||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.64.2
|
||||
|
||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
build-essential \
|
||||
cmake \
|
||||
git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
|
||||
|
||||
WORKDIR /build/grpc/cmake/build
|
||||
|
||||
RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
|
||||
make
|
||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||
# and running make install in the target container
|
||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
mkdir -p /build/grpc/cmake/build && \
|
||||
cd /build/grpc/cmake/build && \
|
||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||
make && \
|
||||
make install && \
|
||||
rm -rf /build
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE} AS builder
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM build-requirements-drivers AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
ARG GO_TAGS="stablediffusion tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
|
||||
@@ -148,46 +337,52 @@ COPY . .
|
||||
COPY .git .
|
||||
RUN echo "GO_TAGS: $GO_TAGS"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN make prepare
|
||||
|
||||
# If we are building with clblas support, we need the libraries for the builds
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y libclblast-dev && \
|
||||
apt-get clean \
|
||||
; fi
|
||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||
# here so that we can generate the grpc code for the stablediffusion build
|
||||
RUN <<EOT bash
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
fi
|
||||
EOT
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
COPY --from=grpc /build/grpc ./grpc/
|
||||
|
||||
WORKDIR /build/grpc/cmake/build
|
||||
RUN make install
|
||||
# Install the pre-built GRPC
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
# Rebuild with defaults backends
|
||||
WORKDIR /build
|
||||
RUN make build
|
||||
# Need to build tts and stablediffusion separately first (?)
|
||||
RUN make dist && rm release/*.sha256 && mv release/* local-ai
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
||||
; fi
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
FROM requirements-${IMAGE_TYPE}
|
||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||
FROM run-requirements-drivers
|
||||
|
||||
ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
ARG TARGETARCH
|
||||
ARG IMAGE_TYPE=extras
|
||||
ARG EXTRA_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
@@ -195,29 +390,20 @@ ENV REBUILD=false
|
||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MAJOR_VERSION=12
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV PIP_CACHE_PURGE=true
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get install -y ffmpeg && apt-get clean \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ffmpeg && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
; fi
|
||||
|
||||
# Add OpenCL
|
||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y libclblast1 && \
|
||||
apt-get clean \
|
||||
; fi
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y cmake git && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
|
||||
@@ -227,64 +413,75 @@ WORKDIR /build
|
||||
COPY . .
|
||||
|
||||
COPY --from=builder /build/sources ./sources/
|
||||
COPY --from=grpc /build/grpc ./grpc/
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc
|
||||
RUN make prepare-sources
|
||||
|
||||
# Copy the binary
|
||||
COPY --from=builder /build/local-ai ./
|
||||
|
||||
# Copy shared libraries for piper
|
||||
# TODO(mudler): bundle these libs in backend-assets/lib/ (like we do for llama.cpp deps)
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/autogptq \
|
||||
# Change the shell to bash so we can use [[ tests below
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/coqui \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/parler-tts \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/diffusers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/transformers-musicgen \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/bark \
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vall-e-x \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/openvoice \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/petals \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/transformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/diffusers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/vllm \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/mamba \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/transformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/vall-e-x \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/exllama \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/petals \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/transformers-musicgen \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/parler-tts \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/coqui \
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vllm \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/autogptq \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/bark \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/mamba \
|
||||
; fi
|
||||
|
||||
# Make sure the models directory exists
|
||||
@@ -293,7 +490,7 @@ RUN mkdir -p /build/models
|
||||
# Define the health check command
|
||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||
|
||||
|
||||
VOLUME /build/models
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||
|
||||
223
Makefile
223
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=46e12c4692a37bdd31a0432fc5153d7d22bc7f72
|
||||
CPPLLAMA_VERSION?=9ef07800622e4c371605f9419864d15667c3558f
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,19 +16,19 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418
|
||||
WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||
|
||||
# go-piper version
|
||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_VERSION?=433ea6d9b64d9d08067324a757ef07040ea29568
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# tinydream version
|
||||
TINYDREAM_VERSION?=22a12a4bc0ac5455856f28f3b771331a551a4293
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
export BUILD_TYPE?=
|
||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||
@@ -38,7 +38,7 @@ CGO_LDFLAGS?=
|
||||
CGO_LDFLAGS_WHISPER?=
|
||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||
GO_TAGS?=
|
||||
BUILD_ID?=git
|
||||
BUILD_ID?=
|
||||
|
||||
TEST_DIR=/tmp/test
|
||||
|
||||
@@ -54,7 +54,7 @@ override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell gi
|
||||
|
||||
OPTIONAL_TARGETS?=
|
||||
|
||||
OS := $(shell uname -s)
|
||||
export OS := $(shell uname -s)
|
||||
ARCH := $(shell uname -m)
|
||||
GREEN := $(shell tput -Txterm setaf 2)
|
||||
YELLOW := $(shell tput -Txterm setaf 3)
|
||||
@@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifeq ($(OS),Darwin)
|
||||
|
||||
|
||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||
endif
|
||||
@@ -80,8 +80,8 @@ ifeq ($(OS),Darwin)
|
||||
BUILD_TYPE=metal
|
||||
# disable metal if on Darwin and any other value is explicitly passed.
|
||||
else ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||
export LLAMA_NO_ACCELERATE=1
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
export GGML_NO_ACCELERATE=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
@@ -98,9 +98,13 @@ endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
export LLAMA_CUBLAS=1
|
||||
export WHISPER_CUBLAS=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||
export GGML_CUDA=1
|
||||
export WHISPER_CUDA=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),vulkan)
|
||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),hipblas)
|
||||
@@ -112,15 +116,15 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export WHISPER_HIPBLAS=1
|
||||
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
|
||||
export LLAMA_METAL=1
|
||||
export GGML_METAL=1
|
||||
export WHISPER_METAL=1
|
||||
endif
|
||||
|
||||
@@ -152,10 +156,14 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
@@ -240,7 +248,7 @@ sources/whisper.cpp:
|
||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && make libwhisper.a
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
||||
|
||||
@@ -293,6 +301,7 @@ clean: ## Remove build related file
|
||||
rm -rf backend-assets/*
|
||||
$(MAKE) -C backend/cpp/grpc clean
|
||||
$(MAKE) -C backend/cpp/llama clean
|
||||
rm -rf backend/cpp/llama-* || true
|
||||
$(MAKE) dropreplace
|
||||
$(MAKE) protogen-clean
|
||||
rmdir pkg/grpc/proto || true
|
||||
@@ -308,17 +317,56 @@ build: prepare backend-assets grpcs ## Build the project
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||
ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp $(BACKEND_LIBS) backend-assets/lib/
|
||||
endif
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||
|
||||
build-minimal:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
|
||||
|
||||
build-api:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||
|
||||
dist: build
|
||||
backend-assets/lib:
|
||||
mkdir -p backend-assets/lib
|
||||
|
||||
dist:
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||
ifeq ($(OS),Darwin)
|
||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
||||
else
|
||||
ifneq ($(ARCH),arm64)
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
|
||||
endif
|
||||
endif
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
|
||||
else
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
|
||||
endif
|
||||
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256
|
||||
else
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256
|
||||
endif
|
||||
|
||||
osx-signed: build
|
||||
codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)"
|
||||
@@ -358,7 +406,7 @@ prepare-e2e:
|
||||
mkdir -p $(TEST_DIR)
|
||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
|
||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
|
||||
|
||||
run-e2e-image:
|
||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||
@@ -428,7 +476,7 @@ protogen-clean: protogen-go-clean protogen-python-clean
|
||||
.PHONY: protogen-go
|
||||
protogen-go:
|
||||
mkdir -p pkg/grpc/proto
|
||||
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||
backend/backend.proto
|
||||
|
||||
.PHONY: protogen-go-clean
|
||||
@@ -437,10 +485,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@@ -554,6 +602,14 @@ vall-e-x-protogen:
|
||||
vall-e-x-protogen-clean:
|
||||
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
||||
|
||||
.PHONY: openvoice-protogen
|
||||
openvoice-protogen:
|
||||
$(MAKE) -C backend/python/openvoice protogen
|
||||
|
||||
.PHONY: openvoice-protogen-clean
|
||||
openvoice-protogen-clean:
|
||||
$(MAKE) -C backend/python/openvoice protogen-clean
|
||||
|
||||
.PHONY: vllm-protogen
|
||||
vllm-protogen:
|
||||
$(MAKE) -C backend/python/vllm protogen
|
||||
@@ -577,6 +633,7 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/transformers-musicgen
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
$(MAKE) -C backend/python/vall-e-x
|
||||
$(MAKE) -C backend/python/openvoice
|
||||
$(MAKE) -C backend/python/exllama
|
||||
$(MAKE) -C backend/python/petals
|
||||
$(MAKE) -C backend/python/exllama2
|
||||
@@ -616,8 +673,8 @@ backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/go
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||
|
||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||
|
||||
backend/cpp/llama/llama.cpp:
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||
@@ -629,7 +686,7 @@ ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||
backend/cpp/llama/grpc-server:
|
||||
build-llama-cpp-grpc-server:
|
||||
# Conditionally build grpc for the llama backend to use if needed
|
||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||
$(MAKE) -C backend/cpp/grpc build
|
||||
@@ -638,19 +695,84 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
||||
$(MAKE) -C backend/cpp/llama grpc-server
|
||||
$(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
else
|
||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
||||
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
||||
# This target is for manually building a variant with-auto detected flags
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cpp
|
||||
$(MAKE) -C backend/cpp/llama-cpp purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||
$(MAKE) -C backend/cpp/llama-avx purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||
|
||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||
$(MAKE) -C backend/cpp/llama-cuda purge
|
||||
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||
|
||||
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
|
||||
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
|
||||
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
|
||||
|
||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||
|
||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
@@ -693,7 +815,18 @@ docker:
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
||||
-t $(DOCKER_IMAGE) .
|
||||
|
||||
|
||||
docker-cuda11:
|
||||
docker build \
|
||||
--build-arg CUDA_MAJOR_VERSION=11 \
|
||||
--build-arg CUDA_MINOR_VERSION=8 \
|
||||
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||
--build-arg GO_TAGS="$(GO_TAGS)" \
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
||||
-t $(DOCKER_IMAGE)-cuda11 .
|
||||
|
||||
docker-aio:
|
||||
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
|
||||
docker build \
|
||||
@@ -724,3 +857,25 @@ docker-image-intel-xpu:
|
||||
.PHONY: swagger
|
||||
swagger:
|
||||
swag init -g core/http/app.go --output swagger
|
||||
|
||||
.PHONY: gen-assets
|
||||
gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
mkdir -p docs/layouts/_default
|
||||
|
||||
docs/static/gallery.html: docs/layouts/_default
|
||||
$(GOCMD) run ./.github/ci/modelslist.go ./gallery/index.yaml > docs/static/gallery.html
|
||||
|
||||
docs/public: docs/layouts/_default docs/static/gallery.html
|
||||
cd docs && hugo --minify
|
||||
|
||||
docs-clean:
|
||||
rm -rf docs/public
|
||||
rm -rf docs/static/gallery.html
|
||||
|
||||
.PHONY: docs
|
||||
docs: docs/static/gallery.html
|
||||
cd docs && hugo serve
|
||||
85
README.md
85
README.md
@@ -46,19 +46,43 @@
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||

|
||||
|
||||
Run the installer script:
|
||||
|
||||
```bash
|
||||
curl https://localai.io/install.sh | sh
|
||||
```
|
||||
|
||||
Or run with docker:
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
# Alternative images:
|
||||
# - if you have an Nvidia GPU:
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
# - without preconfigured models
|
||||
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
# - without preconfigured models for Nvidia GPUs
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 🔥🔥 Hot topics / Roadmap
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
||||
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
|
||||
- llama3: https://github.com/mudler/LocalAI/discussions/2076
|
||||
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
|
||||
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
||||
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
||||
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
|
||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||
@@ -67,29 +91,19 @@ Hot topics (looking for contributors):
|
||||
|
||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||
|
||||
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide.
|
||||
|
||||
For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO(All-in-one) Image using `docker`:
|
||||
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
# or, if you have an Nvidia GPU:
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
|
||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🆕 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
@@ -103,6 +117,7 @@ Build and deploy custom containers:
|
||||
WebUIs:
|
||||
- https://github.com/Jirubizu/localai-admin
|
||||
- https://github.com/go-skynet/LocalAI-frontend
|
||||
- QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
|
||||
|
||||
Model galleries
|
||||
- https://github.com/go-skynet/model-gallery
|
||||
@@ -110,17 +125,19 @@ Model galleries
|
||||
Other:
|
||||
- Helm chart https://github.com/go-skynet/helm-charts
|
||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
|
||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
||||
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||
|
||||
|
||||
### 🔗 Resources
|
||||
|
||||
- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
||||
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
|
||||
@@ -128,7 +145,8 @@ Other:
|
||||
|
||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||
|
||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/ai/answers/tiZMDoZzZV6TLxgDXNBnFE/deploying-helm-charts-on-aws-eks)
|
||||
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
|
||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
||||
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
||||
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
||||
- [LocalAI meets k8sgpt](https://www.youtube.com/watch?v=PKrDNuJ_dfE)
|
||||
@@ -155,17 +173,16 @@ If you utilize this repository, data in a downstream project, please consider ci
|
||||
|
||||
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
||||
|
||||
A huge thank you to our generous sponsors who support this project:
|
||||
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
|
||||
|
||||
|  |
|
||||
|:-----------------------------------------------:|
|
||||
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
||||
|
||||
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||
|
||||
- [Sponsor list](https://github.com/sponsors/mudler)
|
||||
- JDAM00 (donating HW for the CI)
|
||||
<p align="center">
|
||||
<a href="https://www.spectrocloud.com/" target="blank">
|
||||
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
|
||||
</a>
|
||||
<a href="https://www.premai.io/" target="blank">
|
||||
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
||||
</a>
|
||||
</p>
|
||||
|
||||
## 🌟 Star history
|
||||
|
||||
@@ -175,7 +192,7 @@ And a huge shout-out to individuals sponsoring the project by donating hardware
|
||||
|
||||
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
||||
|
||||
MIT - Author Ettore Di Giacinto
|
||||
MIT - Author Ettore Di Giacinto <mudler@localai.io>
|
||||
|
||||
## 🙇 Acknowledgements
|
||||
|
||||
|
||||
@@ -1,9 +1,64 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
@@ -22,38 +77,25 @@ template:
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "\n</tool_call>"
|
||||
- "\n\n\n"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
|
||||
@@ -1,9 +1,64 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
@@ -22,38 +77,25 @@ template:
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "\n</tool_call>"
|
||||
- "\n\n\n"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
<|im_start|>assistant
|
||||
@@ -1,10 +1,66 @@
|
||||
name: gpt-4
|
||||
mmap: false
|
||||
context_size: 8192
|
||||
|
||||
f16: false
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
@@ -23,37 +79,25 @@ template:
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- "\n</tool_call>"
|
||||
- <dummy32000>
|
||||
- "\n\n\n"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
|
||||
@@ -212,6 +212,9 @@ message ModelOptions {
|
||||
float YarnBetaSlow = 47;
|
||||
|
||||
string Type = 49;
|
||||
|
||||
bool FlashAttention = 56;
|
||||
bool NoKVOffload = 57;
|
||||
}
|
||||
|
||||
message Result {
|
||||
@@ -227,6 +230,7 @@ message TranscriptRequest {
|
||||
string dst = 2;
|
||||
string language = 3;
|
||||
uint32 threads = 4;
|
||||
bool translate = 5;
|
||||
}
|
||||
|
||||
message TranscriptResult {
|
||||
@@ -263,6 +267,7 @@ message TTSRequest {
|
||||
string model = 2;
|
||||
string dst = 3;
|
||||
string voice = 4;
|
||||
optional string language = 5;
|
||||
}
|
||||
|
||||
message TokenizationResponse {
|
||||
|
||||
@@ -4,34 +4,44 @@ LLAMA_VERSION?=
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||
TARGET?=--target grpc-server
|
||||
|
||||
# If build type is cublas, then we set -DLLAMA_CUBLAS=ON to CMAKE_ARGS automatically
|
||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||
|
||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||
ifeq ($(BUILD_TYPE),cublas)
|
||||
CMAKE_ARGS+=-DLLAMA_CUBLAS=ON
|
||||
# If build type is openblas then we set -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# to CMAKE_ARGS automatically
|
||||
else ifeq ($(BUILD_TYPE),openblas)
|
||||
CMAKE_ARGS+=-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblas (openCL) we set -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
else ifeq ($(BUILD_TYPE),clblas)
|
||||
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||
else ifeq ($(BUILD_TYPE),hipblas)
|
||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||
# But if it's OSX without metal, disable it here
|
||||
else ifeq ($(OS),darwin)
|
||||
else ifeq ($(OS),Darwin)
|
||||
ifneq ($(BUILD_TYPE),metal)
|
||||
CMAKE_ARGS+=-DLLAMA_METAL=OFF
|
||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||
else
|
||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||
# Until this is tested properly, we disable embedded metal file
|
||||
# as we already embed it as part of the LocalAI assets
|
||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
|
||||
TARGET+=--target ggml-metal
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
CMAKE_ARGS+=-DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
endif
|
||||
|
||||
llama.cpp:
|
||||
@@ -43,35 +53,27 @@ llama.cpp:
|
||||
|
||||
llama.cpp/examples/grpc-server: llama.cpp
|
||||
mkdir -p llama.cpp/examples/grpc-server
|
||||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
bash prepare.sh
|
||||
|
||||
rebuild:
|
||||
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
||||
bash prepare.sh
|
||||
rm -rf grpc-server
|
||||
$(MAKE) grpc-server
|
||||
|
||||
clean:
|
||||
rm -rf llama.cpp
|
||||
purge:
|
||||
rm -rf llama.cpp/build
|
||||
rm -rf llama.cpp/examples/grpc-server
|
||||
rm -rf grpc-server
|
||||
|
||||
clean: purge
|
||||
rm -rf llama.cpp
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
||||
else
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
@@ -791,7 +791,7 @@ struct llama_server_context
|
||||
sampler_names.emplace_back(sampler_name);
|
||||
}
|
||||
}
|
||||
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
|
||||
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -886,6 +886,8 @@ struct llama_server_context
|
||||
{"task_id", slot->task_id},
|
||||
});
|
||||
|
||||
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1146,7 +1148,7 @@ struct llama_server_context
|
||||
std::vector<std::string> samplers_sequence;
|
||||
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
||||
{
|
||||
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
|
||||
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
|
||||
}
|
||||
|
||||
return json {
|
||||
@@ -2217,6 +2219,12 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
} else {
|
||||
params.n_parallel = 1;
|
||||
}
|
||||
|
||||
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
||||
if (llama_grpc_servers != NULL) {
|
||||
params.rpc_servers = std::string(llama_grpc_servers);
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
@@ -2254,6 +2262,9 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
params.use_mmap = request->mmap();
|
||||
params.flash_attn = request->flashattention();
|
||||
params.no_kv_offload = request->nokvoffload();
|
||||
|
||||
params.embedding = request->embeddings();
|
||||
|
||||
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
||||
|
||||
20
backend/cpp/llama/prepare.sh
Normal file
20
backend/cpp/llama/prepare.sh
Normal file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
||||
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
||||
|
||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
||||
echo "grpc-server already added"
|
||||
else
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||
fi
|
||||
|
||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||
## This is an hack for now, but it should be fixed in the future.
|
||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -3,9 +3,9 @@ package main
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
||||
)
|
||||
|
||||
type Image struct {
|
||||
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -3,9 +3,9 @@ package main
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/tinydream"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/tinydream"
|
||||
)
|
||||
|
||||
type Image struct {
|
||||
|
||||
@@ -5,8 +5,8 @@ package main
|
||||
import (
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Embeddings struct {
|
||||
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -5,8 +5,8 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -4,10 +4,11 @@ package main
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/langchain"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
@@ -18,9 +19,14 @@ type LLM struct {
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
||||
var err error
|
||||
hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
|
||||
if hfToken == "" {
|
||||
return fmt.Errorf("no huggingface token provided")
|
||||
}
|
||||
llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
|
||||
llm.model = opts.Model
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -5,9 +5,9 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
|
||||
@@ -3,7 +3,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
|
||||
@@ -7,7 +7,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -7,8 +7,8 @@ import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/donomii/go-rwkv.cpp"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
@@ -31,7 +31,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
|
||||
|
||||
if model == nil {
|
||||
return fmt.Errorf("could not load model")
|
||||
return fmt.Errorf("rwkv could not load model")
|
||||
}
|
||||
llm.rwkv = model
|
||||
return nil
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"flag"
|
||||
"os"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -8,8 +8,8 @@ import (
|
||||
"math"
|
||||
"slices"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-audio/wav"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
)
|
||||
|
||||
func ffmpegCommand(args []string) (string, error) {
|
||||
@@ -29,8 +29,8 @@ func audioToWav(src, dst string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
|
||||
res := schema.Result{}
|
||||
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
|
||||
res := schema.TranscriptionResult{}
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
@@ -75,6 +75,10 @@ func Transcript(model whisper.Model, audiopath, language string, threads uint) (
|
||||
context.SetLanguage("auto")
|
||||
}
|
||||
|
||||
if translate {
|
||||
context.SetTranslate(true)
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil, nil); err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
@@ -4,9 +4,9 @@ package main
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Whisper struct {
|
||||
@@ -21,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
|
||||
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
||||
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
|
||||
@@ -7,8 +7,8 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
piper "github.com/mudler/go-piper"
|
||||
)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
.PHONY: autogptq
|
||||
autogptq: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
@@ -10,4 +10,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,93 +0,0 @@
|
||||
####
|
||||
# Attention! This file is abandoned.
|
||||
# Please use the ../common-env/transformers/transformers.yml file to manage dependencies.
|
||||
###
|
||||
name: autogptq
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.27.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- auto-gptq==0.7.1
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- dill==0.3.7
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- optimum==1.17.1
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- safetensors>=0.3.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- tqdm==4.66.1
|
||||
- torch==2.2.1
|
||||
- torchvision==0.17.1
|
||||
- transformers==4.34.0
|
||||
- transformers_stream_generator==0.0.5
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==2.0.6
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
14
backend/python/autogptq/install.sh
Executable file
14
backend/python/autogptq/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
2
backend/python/autogptq/requirements-hipblas.txt
Normal file
2
backend/python/autogptq/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
5
backend/python/autogptq/requirements-intel.txt
Normal file
5
backend/python/autogptq/requirements-intel.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
7
backend/python/autogptq/requirements.txt
Normal file
7
backend/python/autogptq/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,14 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the autogptq server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/autogptq.py $@
|
||||
startBackend $@
|
||||
6
backend/python/autogptq/test.sh
Executable file
6
backend/python/autogptq/test.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -1,6 +1,6 @@
|
||||
.PHONY: ttsbark
|
||||
ttsbark: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@@ -22,4 +22,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
14
backend/python/bark/install.sh
Executable file
14
backend/python/bark/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
3
backend/python/bark/requirements-hipblas.txt
Normal file
3
backend/python/bark/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
6
backend/python/bark/requirements-intel.txt
Normal file
6
backend/python/bark/requirements-intel.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
6
backend/python/bark/requirements.txt
Normal file
6
backend/python/bark/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
accelerate
|
||||
bark==0.1.5
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,14 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the ttsbark server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/ttsbark.py $@
|
||||
startBackend $@
|
||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "ttsbark.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
|
||||
11
backend/python/bark/test.sh
Normal file → Executable file
11
backend/python/bark/test.sh
Normal file → Executable file
@@ -1,11 +1,6 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the bark server with conda
|
||||
set -e
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test.py
|
||||
runUnittests
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
CONDA_ENV_PATH = "transformers.yml"
|
||||
|
||||
ifeq ($(BUILD_TYPE), cublas)
|
||||
CONDA_ENV_PATH = "transformers-nvidia.yml"
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE), hipblas)
|
||||
CONDA_ENV_PATH = "transformers-rocm.yml"
|
||||
endif
|
||||
|
||||
# Intel GPU are supposed to have dependencies installed in the main python
|
||||
# environment, so we skip conda installation for SYCL builds.
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: transformers
|
||||
transformers:
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
bash install.sh $(CONDA_ENV_PATH)
|
||||
@@ -1,44 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
REQUIREMENTS_FILE=$1
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if [ $SKIP_CONDA -eq 1 ]; then
|
||||
echo "Skipping conda environment installation"
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
if conda_env_exists "transformers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name transformers --file $REQUIREMENTS_FILE
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the intel image
|
||||
# (no conda env)
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
|
||||
fi
|
||||
|
||||
# If we didn't skip conda, activate the environment
|
||||
# to install FlashAttention
|
||||
if [ $SKIP_CONDA -eq 0 ]; then
|
||||
source activate transformers
|
||||
fi
|
||||
if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then
|
||||
#TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily
|
||||
pip install flash-attn --no-build-isolation
|
||||
fi
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
@@ -1,125 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.27.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- auto-gptq==0.7.1
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- bitsandbytes==0.43.0
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- TTS==0.22.0
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.5.1 # Updated Version
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- optimum==1.17.1
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.12.0 # Updated Version
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers
|
||||
- torch==2.1.2
|
||||
- torchvision==0.16.2
|
||||
- torchaudio==2.1.2
|
||||
- tqdm==4.66.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
- soundfile
|
||||
- langid
|
||||
- wget
|
||||
- unidecode
|
||||
- pyopenjtalk-prebuilt
|
||||
- pypinyin
|
||||
- inflect
|
||||
- cn2an
|
||||
- jieba
|
||||
- eng_to_ipa
|
||||
- openai-whisper
|
||||
- matplotlib
|
||||
- gradio==3.41.2
|
||||
- nltk
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
@@ -1,113 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- --pre
|
||||
- --extra-index-url https://download.pytorch.org/whl/nightly/
|
||||
- accelerate==0.27.0
|
||||
- auto-gptq==0.7.1
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- TTS==0.22.0
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.5.1 # Updated Version
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx
|
||||
- numpy==1.26.0
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.12.0 # Updated Version
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers
|
||||
- torch
|
||||
- torchaudio
|
||||
- tqdm==4.66.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
- soundfile
|
||||
- langid
|
||||
- wget
|
||||
- unidecode
|
||||
- optimum==1.17.1
|
||||
- pyopenjtalk-prebuilt
|
||||
- pypinyin
|
||||
- inflect
|
||||
- cn2an
|
||||
- jieba
|
||||
- eng_to_ipa
|
||||
- openai-whisper
|
||||
- matplotlib
|
||||
- gradio==3.41.2
|
||||
- nltk
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
@@ -1,118 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.27.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- auto-gptq==0.7.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- coloredlogs==15.0.1
|
||||
- TTS==0.22.0
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.5.1 # Updated Version
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub
|
||||
- humanfriendly==10.0
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx
|
||||
- numpy==1.26.0
|
||||
- onnx==1.15.0
|
||||
- openvino==2024.1.0
|
||||
- openvino-telemetry==2024.1.0
|
||||
- optimum[openvino]==1.19.1
|
||||
- optimum-intel==1.16.1
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.12.0 # Updated Version
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers
|
||||
- torch==2.1.2
|
||||
- torchvision==0.16.2
|
||||
- torchaudio==2.1.2
|
||||
- tqdm==4.66.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
- soundfile
|
||||
- langid
|
||||
- wget
|
||||
- unidecode
|
||||
- pyopenjtalk-prebuilt
|
||||
- pypinyin
|
||||
- inflect
|
||||
- cn2an
|
||||
- jieba
|
||||
- eng_to_ipa
|
||||
- openai-whisper
|
||||
- matplotlib
|
||||
- gradio==3.41.2
|
||||
- nltk
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
213
backend/python/common/libbackend.sh
Normal file
213
backend/python/common/libbackend.sh
Normal file
@@ -0,0 +1,213 @@
|
||||
|
||||
|
||||
# init handles the setup of the library
|
||||
#
|
||||
# use the library by adding the following line to a script:
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
# If you want to limit what targets a backend can be used on, set the variable LIMIT_TARGETS to a
|
||||
# space separated list of valid targets BEFORE sourcing the library, for example to only allow a backend
|
||||
# to be used on CUDA and CPU backends:
|
||||
#
|
||||
# LIMIT_TARGETS="cublas cpu"
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
# You can use any valid BUILD_TYPE or BUILD_PROFILE, if you need to limit a backend to CUDA 12 only:
|
||||
#
|
||||
# LIMIT_TARGETS="cublas12"
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
function init() {
|
||||
BACKEND_NAME=${PWD##*/}
|
||||
MY_DIR=$(realpath `dirname $0`)
|
||||
BUILD_PROFILE=$(getBuildProfile)
|
||||
|
||||
# If a backend has defined a list of valid build profiles...
|
||||
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
||||
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
||||
if [ ${isValidTarget} != true ]; then
|
||||
echo "${BACKEND_NAME} can only be used on the following targets: ${LIMIT_TARGETS}"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Initializing libbackend for ${BACKEND_NAME}"
|
||||
}
|
||||
|
||||
# getBuildProfile will inspect the system to determine which build profile is appropriate:
|
||||
# returns one of the following:
|
||||
# - cublas11
|
||||
# - cublas12
|
||||
# - hipblas
|
||||
# - intel
|
||||
function getBuildProfile() {
|
||||
# First check if we are a cublas build, and if so report the correct build profile
|
||||
if [ x"${BUILD_TYPE}" == "xcublas" ]; then
|
||||
if [ ! -z ${CUDA_MAJOR_VERSION} ]; then
|
||||
# If we have been given a CUDA version, we trust it
|
||||
echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION}
|
||||
else
|
||||
# We don't know what version of cuda we are, so we report ourselves as a generic cublas
|
||||
echo ${BUILD_TYPE}
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If /opt/intel exists, then we are doing an intel/ARC build
|
||||
if [ -d "/opt/intel" ]; then
|
||||
echo "intel"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If for any other values of BUILD_TYPE, we don't need any special handling/discovery
|
||||
if [ ! -z ${BUILD_TYPE} ]; then
|
||||
echo ${BUILD_TYPE}
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If there is no BUILD_TYPE set at all, set a build-profile value of CPU, we aren't building for any GPU targets
|
||||
echo "cpu"
|
||||
}
|
||||
|
||||
# ensureVenv makes sure that the venv for the backend both exists, and is activated.
|
||||
#
|
||||
# This function is idempotent, so you can call it as many times as you want and it will
|
||||
# always result in an activated virtual environment
|
||||
function ensureVenv() {
|
||||
if [ ! -d "${MY_DIR}/venv" ]; then
|
||||
uv venv ${MY_DIR}/venv
|
||||
echo "virtualenv created"
|
||||
fi
|
||||
|
||||
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
|
||||
source ${MY_DIR}/venv/bin/activate
|
||||
echo "virtualenv activated"
|
||||
fi
|
||||
|
||||
echo "activated virtualenv has been ensured"
|
||||
}
|
||||
|
||||
# installRequirements looks for several requirements files and if they exist runs the install for them in order
|
||||
#
|
||||
# - requirements-install.txt
|
||||
# - requirements.txt
|
||||
# - requirements-${BUILD_TYPE}.txt
|
||||
# - requirements-${BUILD_PROFILE}.txt
|
||||
#
|
||||
# BUILD_PROFILE is a pore specific version of BUILD_TYPE, ex: cuda11 or cuda12
|
||||
# it can also include some options that we do not have BUILD_TYPES for, ex: intel
|
||||
#
|
||||
# NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.
|
||||
# you may want to add the following line to a requirements-intel.txt if you use one:
|
||||
#
|
||||
# --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
#
|
||||
# If you need to add extra flags into the pip install command you can do so by setting the variable EXTRA_PIP_INSTALL_FLAGS
|
||||
# before calling installRequirements. For example:
|
||||
#
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
# EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
||||
# installRequirements
|
||||
function installRequirements() {
|
||||
ensureVenv
|
||||
|
||||
# These are the requirements files we will attempt to install, in order
|
||||
declare -a requirementFiles=(
|
||||
"${MY_DIR}/requirements-install.txt"
|
||||
"${MY_DIR}/requirements.txt"
|
||||
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
|
||||
)
|
||||
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
|
||||
fi
|
||||
|
||||
for reqFile in ${requirementFiles[@]}; do
|
||||
if [ -f ${reqFile} ]; then
|
||||
echo "starting requirements install for ${reqFile}"
|
||||
uv pip install ${EXTRA_PIP_INSTALL_FLAGS} --requirement ${reqFile}
|
||||
echo "finished requirements install for ${reqFile}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# startBackend discovers and runs the backend GRPC server
|
||||
#
|
||||
# You can specify a specific backend file to execute by setting BACKEND_FILE before calling startBackend.
|
||||
# example:
|
||||
#
|
||||
# source ../common/libbackend.sh
|
||||
# BACKEND_FILE="${MY_DIR}/source/backend.py"
|
||||
# startBackend $@
|
||||
#
|
||||
# valid filenames for autodiscovered backend servers are:
|
||||
# - server.py
|
||||
# - backend.py
|
||||
# - ${BACKEND_NAME}.py
|
||||
function startBackend() {
|
||||
ensureVenv
|
||||
|
||||
if [ ! -z ${BACKEND_FILE} ]; then
|
||||
python ${BACKEND_FILE} $@
|
||||
elif [ -e "${MY_DIR}/server.py" ]; then
|
||||
python ${MY_DIR}/server.py $@
|
||||
elif [ -e "${MY_DIR}/backend.py" ]; then
|
||||
python ${MY_DIR}/backend.py $@
|
||||
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
|
||||
python ${MY_DIR}/${BACKEND_NAME}.py $@
|
||||
fi
|
||||
}
|
||||
|
||||
# runUnittests discovers and runs python unittests
|
||||
#
|
||||
# You can specify a specific test file to use by setting TEST_FILE before calling runUnittests.
|
||||
# example:
|
||||
#
|
||||
# source ../common/libbackend.sh
|
||||
# TEST_FILE="${MY_DIR}/source/test.py"
|
||||
# runUnittests $@
|
||||
#
|
||||
# be default a file named test.py in the backends directory will be used
|
||||
function runUnittests() {
|
||||
ensureVenv
|
||||
|
||||
if [ ! -z ${TEST_FILE} ]; then
|
||||
testDir=$(dirname `realpath ${TEST_FILE}`)
|
||||
testFile=$(basename ${TEST_FILE})
|
||||
pushd ${testDir}
|
||||
python -m unittest ${testFile}
|
||||
popd
|
||||
elif [ -f "${MY_DIR}/test.py" ]; then
|
||||
pushd ${MY_DIR}
|
||||
python -m unittest test.py
|
||||
popd
|
||||
else
|
||||
echo "no tests defined for ${BACKEND_NAME}"
|
||||
fi
|
||||
}
|
||||
|
||||
##################################################################################
|
||||
# Below here are helper functions not intended to be used outside of the library #
|
||||
##################################################################################
|
||||
|
||||
# checkTargets determines if the current BUILD_TYPE or BUILD_PROFILE is in a list of valid targets
|
||||
function checkTargets() {
|
||||
# Collect all provided targets into a variable and...
|
||||
targets=$@
|
||||
# ...convert it into an array
|
||||
declare -a targets=($targets)
|
||||
|
||||
for target in ${targets[@]}; do
|
||||
if [ "x${BUILD_TYPE}" == "x${target}" ]; then
|
||||
echo true
|
||||
return 0
|
||||
fi
|
||||
if [ "x${BUILD_PROFILE}" == "x${target}" ]; then
|
||||
echo true
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
echo false
|
||||
}
|
||||
|
||||
init
|
||||
19
backend/python/common/template/Makefile
Normal file
19
backend/python/common/template/Makefile
Normal file
@@ -0,0 +1,19 @@
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: install
|
||||
install: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
4
backend/python/common/template/backend.py
Executable file
4
backend/python/common/template/backend.py
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
import grpc
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
14
backend/python/common/template/install.sh
Executable file
14
backend/python/common/template/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
2
backend/python/common/template/requirements-hipblas.txt
Normal file
2
backend/python/common/template/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
4
backend/python/common/template/requirements-intel.txt
Normal file
4
backend/python/common/template/requirements-intel.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
2
backend/python/common/template/requirements.txt
Normal file
2
backend/python/common/template/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
4
backend/python/common/template/run.sh
Executable file
4
backend/python/common/template/run.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
6
backend/python/common/template/test.sh
Executable file
6
backend/python/common/template/test.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -1,6 +1,6 @@
|
||||
.PHONY: coqui
|
||||
coqui: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@@ -22,4 +22,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -66,7 +66,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
def TTS(self, request, context):
|
||||
try:
|
||||
self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=COQUI_LANGUAGE, file_path=request.dst)
|
||||
# if model is multilangual add language from request or env as fallback
|
||||
lang = request.language or COQUI_LANGUAGE
|
||||
if lang == "":
|
||||
lang = None
|
||||
if self.tts.is_multi_lingual and lang is None:
|
||||
return backend_pb2.Result(success=False, message=f"Model is multi-lingual, but no language was provided")
|
||||
|
||||
# if model is multi-speaker, use speaker_wav or the speaker_id from request.voice
|
||||
if self.tts.is_multi_speaker and self.AudioPath is None and request.voice is None:
|
||||
return backend_pb2.Result(success=False, message=f"Model is multi-speaker, but no speaker was provided")
|
||||
|
||||
if self.tts.is_multi_speaker and request.voice is not None:
|
||||
self.tts.tts_to_file(text=request.text, speaker=request.voice, language=lang, file_path=request.dst)
|
||||
else:
|
||||
self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=lang, file_path=request.dst)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
14
backend/python/coqui/install.sh
Executable file
14
backend/python/coqui/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
3
backend/python/coqui/requirements-hipblas.txt
Normal file
3
backend/python/coqui/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
6
backend/python/coqui/requirements-intel.txt
Normal file
6
backend/python/coqui/requirements-intel.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
6
backend/python/coqui/requirements.txt
Normal file
6
backend/python/coqui/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
accelerate
|
||||
TTS==0.22.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,14 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the ttsbark server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/coqui_server.py $@
|
||||
startBackend $@
|
||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "coqui_server.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
|
||||
11
backend/python/coqui/test.sh
Normal file → Executable file
11
backend/python/coqui/test.sh
Normal file → Executable file
@@ -1,11 +1,6 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the bark server with conda
|
||||
set -e
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test.py
|
||||
runUnittests
|
||||
|
||||
@@ -13,8 +13,7 @@ endif
|
||||
|
||||
.PHONY: diffusers
|
||||
diffusers: protogen
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
bash install.sh $(CONDA_ENV_PATH)
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@@ -33,4 +32,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -17,7 +17,7 @@ import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from diffusers.utils import load_image,export_to_video
|
||||
@@ -225,6 +225,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
variant=variant)
|
||||
elif request.PipelineType == "StableDiffusion3Pipeline":
|
||||
if fromSingleFile:
|
||||
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True)
|
||||
else:
|
||||
self.pipe = StableDiffusion3Pipeline.from_pretrained(
|
||||
request.Model,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
variant=variant)
|
||||
|
||||
if CLIPSKIP and request.CLIPSkip != 0:
|
||||
self.clip_skip = request.CLIPSkip
|
||||
@@ -1,65 +0,0 @@
|
||||
name: diffusers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- --pre
|
||||
- --extra-index-url https://download.pytorch.org/whl/nightly/
|
||||
- accelerate>=0.11.0
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- compel==2.0.2
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- omegaconf
|
||||
- packaging==23.2
|
||||
- pillow==10.0.1
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyparsing==3.1.1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- safetensors==0.4.0
|
||||
- sympy==1.12
|
||||
- tqdm==4.66.1
|
||||
- transformers>=4.25.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- torch
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
@@ -1,75 +0,0 @@
|
||||
name: diffusers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- compel==2.0.2
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- omegaconf
|
||||
- packaging==23.2
|
||||
- pillow==10.0.1
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyparsing==3.1.1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- safetensors==0.4.0
|
||||
- sympy==1.12
|
||||
- torch==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers>=4.25.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
@@ -1,50 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
set -e
|
||||
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if [ $SKIP_CONDA -eq 1 ]; then
|
||||
echo "Skipping conda environment installation"
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
if conda_env_exists "diffusers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name diffusers --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the Intel image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install torch==2.1.0a0 \
|
||||
torchvision==0.16.0a0 \
|
||||
torchaudio==2.1.0a0 \
|
||||
intel-extension-for-pytorch==2.1.10+xpu \
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
pip install google-api-python-client \
|
||||
grpcio \
|
||||
grpcio-tools \
|
||||
diffusers==0.24.0 \
|
||||
transformers>=4.25.1 \
|
||||
accelerate \
|
||||
compel==2.0.2 \
|
||||
Pillow
|
||||
fi
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
if [ $SKIP_CONDA -ne 1 ]; then
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
fi
|
||||
|
||||
pip cache purge
|
||||
fi
|
||||
installRequirements
|
||||
|
||||
3
backend/python/diffusers/requirements-hipblas.txt
Normal file
3
backend/python/diffusers/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchvision
|
||||
6
backend/python/diffusers/requirements-intel.txt
Normal file
6
backend/python/diffusers/requirements-intel.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchvision
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
11
backend/python/diffusers/requirements.txt
Normal file
11
backend/python/diffusers/requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
accelerate
|
||||
compel
|
||||
diffusers
|
||||
grpcio==1.64.0
|
||||
opencv-python
|
||||
pillow
|
||||
protobuf
|
||||
sentencepiece
|
||||
torch
|
||||
transformers
|
||||
certifi
|
||||
@@ -1,19 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/backend_diffusers.py $@
|
||||
startBackend $@
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user