mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 00:26:34 -04:00
Compare commits
472 Commits
v3.1.1
...
chore/vulk
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2aed98d14b | ||
|
|
b3a1b3d63f | ||
|
|
e25dd2fe26 | ||
|
|
002f75ac79 | ||
|
|
1ce8f35834 | ||
|
|
be8a314496 | ||
|
|
406d62d6aa | ||
|
|
9e9f953eec | ||
|
|
84ebf2a2c9 | ||
|
|
ce5662ba90 | ||
|
|
9878f27813 | ||
|
|
f2b9452ec4 | ||
|
|
585da99c52 | ||
|
|
fd4f432079 | ||
|
|
238c68c57b | ||
|
|
04fbf5cb82 | ||
|
|
c85d559919 | ||
|
|
b5efc4f89e | ||
|
|
3f9c09a4c5 | ||
|
|
4a84660475 | ||
|
|
737248256e | ||
|
|
0ae334fc62 | ||
|
|
36c373b7c9 | ||
|
|
6afcb932b7 | ||
|
|
357bf571a3 | ||
|
|
e74ade9ebb | ||
|
|
f7f26b8efa | ||
|
|
75eb98f8bd | ||
|
|
c337e7baf7 | ||
|
|
660bd45be8 | ||
|
|
c27da0a0f6 | ||
|
|
ac043ed9ba | ||
|
|
2e0d66a1c8 | ||
|
|
41a0f361eb | ||
|
|
d3c5c02837 | ||
|
|
ae3d8fb0c4 | ||
|
|
902e47f0b0 | ||
|
|
50bb78fd24 | ||
|
|
542f07ab2d | ||
|
|
77c5acb9db | ||
|
|
44bbf4d778 | ||
|
|
633c12f93d | ||
|
|
6f24135f1d | ||
|
|
b72aa7b4fa | ||
|
|
e94e725479 | ||
|
|
e4ac7b14a3 | ||
|
|
ddb39c73f2 | ||
|
|
264b09fb1e | ||
|
|
36dd45df51 | ||
|
|
e5599f87b8 | ||
|
|
e89b5cc0e3 | ||
|
|
10bf1084cc | ||
|
|
b08ae559b3 | ||
|
|
aa7cb7e18c | ||
|
|
eadd3d4e46 | ||
|
|
2a18206033 | ||
|
|
39798d734e | ||
|
|
d0e99562af | ||
|
|
6410c99bf2 | ||
|
|
55766d269b | ||
|
|
ffa0ad1eac | ||
|
|
623789a29e | ||
|
|
2b9a3d32c9 | ||
|
|
f8b71dc5d0 | ||
|
|
1d3331b5cb | ||
|
|
2c0b9c6349 | ||
|
|
3c6c976755 | ||
|
|
ebbcba342a | ||
|
|
0de75519dc | ||
|
|
37f5e4f5c1 | ||
|
|
ffa934b959 | ||
|
|
59311d8b1e | ||
|
|
d9e25af7b5 | ||
|
|
e4f8b63b40 | ||
|
|
1364ae9be6 | ||
|
|
cfd6a9150d | ||
|
|
cd352d0c5f | ||
|
|
8d47309695 | ||
|
|
5f6fc02a55 | ||
|
|
0b528458d8 | ||
|
|
caab380c5d | ||
|
|
8a3a362504 | ||
|
|
07238eb743 | ||
|
|
e905e90dd7 | ||
|
|
08432d49e5 | ||
|
|
e51e2aacb9 | ||
|
|
9c3d85fc28 | ||
|
|
007ca647a7 | ||
|
|
59af928379 | ||
|
|
dbc2bb561b | ||
|
|
c72c85dcac | ||
|
|
ef984901e6 | ||
|
|
9911ec84a3 | ||
|
|
1956681d4c | ||
|
|
326f6e5ccb | ||
|
|
302958efd6 | ||
|
|
3dc86b247d | ||
|
|
5ec724af06 | ||
|
|
1f1e156bf0 | ||
|
|
df625e366a | ||
|
|
9e6685ac9c | ||
|
|
90c818aa71 | ||
|
|
034b9b691b | ||
|
|
ba52822e5c | ||
|
|
eb30f6c090 | ||
|
|
caba098959 | ||
|
|
3c75ea1e0e | ||
|
|
c5f911812f | ||
|
|
d82922786a | ||
|
|
d9e9bb4c0e | ||
|
|
657027bec6 | ||
|
|
2f5635308d | ||
|
|
63b5338dbd | ||
|
|
3150174962 | ||
|
|
4330fdce33 | ||
|
|
fef8583144 | ||
|
|
d4d6a56a4f | ||
|
|
2900a601a0 | ||
|
|
43e0437db6 | ||
|
|
976c159fdb | ||
|
|
969922ffec | ||
|
|
739573e41b | ||
|
|
dbdf2908ad | ||
|
|
317f8641dc | ||
|
|
54ff70e451 | ||
|
|
723f01c87e | ||
|
|
79a41a5e07 | ||
|
|
d0b6aa3f7d | ||
|
|
ad99399c6e | ||
|
|
e6ebfd3ba1 | ||
|
|
ead00a28b9 | ||
|
|
9621edb4c5 | ||
|
|
7ce92f0646 | ||
|
|
6a4ab3c1e0 | ||
|
|
83b85494c1 | ||
|
|
df6a80b38d | ||
|
|
21faa4114b | ||
|
|
e35ad56602 | ||
|
|
3be8b2d8e1 | ||
|
|
900745bb4d | ||
|
|
15a7fc7e9a | ||
|
|
03dddec538 | ||
|
|
3d34386712 | ||
|
|
1b3f66018b | ||
|
|
4381e892b8 | ||
|
|
3c3f477854 | ||
|
|
f8a8cf3e95 | ||
|
|
0fc88b3cdf | ||
|
|
4993df81c3 | ||
|
|
599bc88c6c | ||
|
|
1a0d06f3db | ||
|
|
5e1a8b3621 | ||
|
|
960e51e527 | ||
|
|
195aa22e77 | ||
|
|
be132fe816 | ||
|
|
ff5d2dc8be | ||
|
|
c1cfa08226 | ||
|
|
fec8a36b36 | ||
|
|
5d4f5d2355 | ||
|
|
057248008f | ||
|
|
9f2c9cd691 | ||
|
|
6971f71a6c | ||
|
|
1ba66d00f5 | ||
|
|
259383cf5e | ||
|
|
209c0694f5 | ||
|
|
0fd395d6ec | ||
|
|
d04bd47116 | ||
|
|
1d830ce7dd | ||
|
|
6dccfb09f8 | ||
|
|
e4d9cf8349 | ||
|
|
c899e90277 | ||
|
|
8193d18c7c | ||
|
|
2e4dc6456f | ||
|
|
4594430a3e | ||
|
|
9c7f92c81f | ||
|
|
060037bcd4 | ||
|
|
d9da4676b4 | ||
|
|
5ef4c2e471 | ||
|
|
27ce570844 | ||
|
|
42c7859ab1 | ||
|
|
e7e83d0fa6 | ||
|
|
c6dc1d86f1 | ||
|
|
6fd2e1964d | ||
|
|
49ae41b716 | ||
|
|
b3f0ed62fd | ||
|
|
4b9afc418b | ||
|
|
e44ff8514b | ||
|
|
2b6be10b6b | ||
|
|
1361d844a1 | ||
|
|
fcc521cae5 | ||
|
|
8cad7138be | ||
|
|
ebd1db2f09 | ||
|
|
7920d75805 | ||
|
|
1d0e24a865 | ||
|
|
9eed5ef872 | ||
|
|
39ab80442a | ||
|
|
1b101df2c0 | ||
|
|
784bd5db33 | ||
|
|
b8b1ca782c | ||
|
|
1149fb66d3 | ||
|
|
243e86176e | ||
|
|
8da38a0d10 | ||
|
|
60786fc876 | ||
|
|
9486b88a25 | ||
|
|
bef4c10629 | ||
|
|
80f15851c5 | ||
|
|
22067e3384 | ||
|
|
4fbd639463 | ||
|
|
70f7d0c25f | ||
|
|
576e821298 | ||
|
|
7293f26fcf | ||
|
|
79973a28ad | ||
|
|
8ab51509cc | ||
|
|
b3384e5428 | ||
|
|
7050c9f69d | ||
|
|
089efe05fd | ||
|
|
253b7537dc | ||
|
|
19c92c70c5 | ||
|
|
b52bfaf1b3 | ||
|
|
bf60ca5bf0 | ||
|
|
2b44467bd1 | ||
|
|
8c1f4a131e | ||
|
|
10a3f0bd92 | ||
|
|
72f4d541d0 | ||
|
|
9f812fdb84 | ||
|
|
b70ee45fff | ||
|
|
9d9c853541 | ||
|
|
18fcd8557c | ||
|
|
d8e27c38d7 | ||
|
|
3b0dc87932 | ||
|
|
2374485222 | ||
|
|
0ca1765c17 | ||
|
|
90b5ed9a1e | ||
|
|
d438b769da | ||
|
|
2e4bd1e33d | ||
|
|
ff73800970 | ||
|
|
94cb20ae7f | ||
|
|
47c20f9adb | ||
|
|
a7fe153630 | ||
|
|
27519d2233 | ||
|
|
8cab0f880b | ||
|
|
8c48b250c4 | ||
|
|
ba802c2ee4 | ||
|
|
429bb7a88c | ||
|
|
b2e8b6d1aa | ||
|
|
fba5b557a1 | ||
|
|
6db19c5cb9 | ||
|
|
5428678209 | ||
|
|
06129139eb | ||
|
|
05757e2738 | ||
|
|
240b790f29 | ||
|
|
5f221f5946 | ||
|
|
def7cdc0bf | ||
|
|
ea9bf3dba2 | ||
|
|
b8eca530b6 | ||
|
|
47034ddacd | ||
|
|
9a41331855 | ||
|
|
facc0181df | ||
|
|
4733adb983 | ||
|
|
326fda3223 | ||
|
|
abf61e5b42 | ||
|
|
2ae45e7635 | ||
|
|
7d41551e10 | ||
|
|
6fbd720515 | ||
|
|
4e40a8d1ed | ||
|
|
003b9292fe | ||
|
|
09457b9221 | ||
|
|
41aa7e107f | ||
|
|
bda875f962 | ||
|
|
224063f0f7 | ||
|
|
89978c8b57 | ||
|
|
987b5dcac1 | ||
|
|
ec1276e5a9 | ||
|
|
61ba98d43d | ||
|
|
b9a25b16e6 | ||
|
|
6a8149e1fd | ||
|
|
9c2840ac38 | ||
|
|
20a70e1244 | ||
|
|
3295a298f4 | ||
|
|
da6f37f000 | ||
|
|
c092633cd7 | ||
|
|
7e2a522229 | ||
|
|
03e8592450 | ||
|
|
f207bd1427 | ||
|
|
a5c0fe31c3 | ||
|
|
c68907ac65 | ||
|
|
9087ddc4de | ||
|
|
33bebd5114 | ||
|
|
2913676157 | ||
|
|
e83652489c | ||
|
|
d6274eaf4a | ||
|
|
4d90971424 | ||
|
|
90f5639639 | ||
|
|
a35a701052 | ||
|
|
3d8ec72dbf | ||
|
|
2a9d675d62 | ||
|
|
c782e8abf1 | ||
|
|
a1e1942d83 | ||
|
|
787302b204 | ||
|
|
0b085089b9 | ||
|
|
624f3b1fc8 | ||
|
|
c07bc55fee | ||
|
|
173e0774c0 | ||
|
|
8ece26ab7c | ||
|
|
d704cc7970 | ||
|
|
ab17baaae1 | ||
|
|
ca358fcdca | ||
|
|
9aadfd485f | ||
|
|
da3b0850de | ||
|
|
8b1e8b4cda | ||
|
|
3d22bfc27c | ||
|
|
4438b4361e | ||
|
|
04bad9a2da | ||
|
|
8235e53602 | ||
|
|
eb5c3670f1 | ||
|
|
89e61fca90 | ||
|
|
9d6efe8842 | ||
|
|
60726d16f2 | ||
|
|
9d7ec09ec0 | ||
|
|
36179ffbed | ||
|
|
d25145e641 | ||
|
|
949e5b9be8 | ||
|
|
73ecb7f90b | ||
|
|
053bed6e5f | ||
|
|
932360bf7e | ||
|
|
6d0b52843f | ||
|
|
078c22f485 | ||
|
|
6ef3852de5 | ||
|
|
a8057b952c | ||
|
|
fd5c1d916f | ||
|
|
5ce982b9c9 | ||
|
|
47ccfccf7a | ||
|
|
a760f7ff39 | ||
|
|
facf7625f3 | ||
|
|
b3600b3c50 | ||
|
|
f0b47cfe6a | ||
|
|
ee625fc34e | ||
|
|
693aa0b5de | ||
|
|
3973e6e5da | ||
|
|
fb6ec68090 | ||
|
|
0301fc7c46 | ||
|
|
813cb4296d | ||
|
|
deda3a4972 | ||
|
|
a28f27604a | ||
|
|
8fe9fa98f2 | ||
|
|
4db1b80278 | ||
|
|
b3c2a3c257 | ||
|
|
61c2304638 | ||
|
|
92c5ab97e2 | ||
|
|
76e471441c | ||
|
|
9cecf5e7ac | ||
|
|
b7b3164736 | ||
|
|
5f7ece3e94 | ||
|
|
c717b8d800 | ||
|
|
f1d35c4149 | ||
|
|
ee7e77b6c1 | ||
|
|
324fecbb75 | ||
|
|
a79bfcf0a7 | ||
|
|
82495e7fb6 | ||
|
|
6030b12283 | ||
|
|
b5be867e28 | ||
|
|
9b806250d4 | ||
|
|
5f066e702f | ||
|
|
47bb3a3db2 | ||
|
|
51230a801e | ||
|
|
754bedc3ea | ||
|
|
98e5291afc | ||
|
|
e29b2c3aff | ||
|
|
8dc574f3c4 | ||
|
|
05bf2493a5 | ||
|
|
eae4ca08da | ||
|
|
fa284f7445 | ||
|
|
8f69b80520 | ||
|
|
b1fc5acd4a | ||
|
|
fab41c29dd | ||
|
|
fb0ec96396 | ||
|
|
7659461036 | ||
|
|
580687da46 | ||
|
|
1929eb2894 | ||
|
|
b29544d747 | ||
|
|
7c30e82647 | ||
|
|
a1d061c835 | ||
|
|
851c67019c | ||
|
|
53ed5ef189 | ||
|
|
294f7022f3 | ||
|
|
932f6b01a6 | ||
|
|
e96452c5d4 | ||
|
|
5fc8d5bb78 | ||
|
|
121937ed6f | ||
|
|
2e38f2a054 | ||
|
|
2a6187bc01 | ||
|
|
584c48df5a | ||
|
|
8dd67748a1 | ||
|
|
3fd0bf3c88 | ||
|
|
4062a6c404 | ||
|
|
354c0b763e | ||
|
|
40f9065367 | ||
|
|
fc02bc0aba | ||
|
|
45badb75e8 | ||
|
|
d7e1922582 | ||
|
|
642a39afa0 | ||
|
|
34d9deaf39 | ||
|
|
ef37a73e1b | ||
|
|
37de945ae8 | ||
|
|
468f1f4539 | ||
|
|
0640451368 | ||
|
|
99058511cc | ||
|
|
ec293b3b59 | ||
|
|
9b1b6df8e9 | ||
|
|
cd7fbafcd2 | ||
|
|
e5125216cf | ||
|
|
2105f82433 | ||
|
|
49c0c7881a | ||
|
|
f8829376d8 | ||
|
|
0475f63675 | ||
|
|
ec206cc67c | ||
|
|
34171fcf94 | ||
|
|
238c334aa7 | ||
|
|
d2df0a1769 | ||
|
|
d58647ac31 | ||
|
|
c1d3ce9a93 | ||
|
|
c1dd4ff5d5 | ||
|
|
48118b9582 | ||
|
|
ceda2e69db | ||
|
|
cea1703acc | ||
|
|
33fc9b9922 | ||
|
|
b783997c52 | ||
|
|
f6ec06d21c | ||
|
|
7e1f2657d5 | ||
|
|
9589097252 | ||
|
|
cb87d331a9 | ||
|
|
6dfc96249a | ||
|
|
a2564ed654 | ||
|
|
6c747caa34 | ||
|
|
8ae5e0feb9 | ||
|
|
c35dd0a7b8 | ||
|
|
2f5af6b246 | ||
|
|
00cf2e0e0a | ||
|
|
c7a1d9c089 | ||
|
|
ad7ba52166 | ||
|
|
c5b9f45166 | ||
|
|
61b64a65ab | ||
|
|
8276952920 | ||
|
|
b7cd5bfaec | ||
|
|
da4312e4d3 | ||
|
|
7d507c54ed | ||
|
|
df7ed49889 | ||
|
|
bfdc29d316 | ||
|
|
7fdc006071 | ||
|
|
615830245b | ||
|
|
61376c0fa7 | ||
|
|
d0fb23514f | ||
|
|
780d034ac9 | ||
|
|
ec2a044c7e | ||
|
|
ad6fdd21fd | ||
|
|
cd94e6b352 | ||
|
|
b37cef3718 | ||
|
|
9f957d547d | ||
|
|
f0d9f0c5d8 | ||
|
|
d33e1c72a3 | ||
|
|
33f9ee06c9 | ||
|
|
c54677402d | ||
|
|
3fe3a7b23d | ||
|
|
f8ff6fa1fd | ||
|
|
dfadc3696e | ||
|
|
dbcf5fb4fc | ||
|
|
2633137a17 | ||
|
|
d9c17dd23b | ||
|
|
d8b7bd4860 | ||
|
|
a611cbc0f4 | ||
|
|
850b525159 | ||
|
|
35b3426a2a |
@@ -2,9 +2,6 @@
|
|||||||
|
|
||||||
cd /workspace
|
cd /workspace
|
||||||
|
|
||||||
# Grab the pre-stashed backend assets to avoid build issues
|
|
||||||
cp -r /build/backend-assets /workspace/backend-assets
|
|
||||||
|
|
||||||
# Ensures generated source files are present upon load
|
# Ensures generated source files are present upon load
|
||||||
make prepare
|
make prepare
|
||||||
|
|
||||||
|
|||||||
@@ -4,9 +4,6 @@ services:
|
|||||||
context: ..
|
context: ..
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
target: devcontainer
|
target: devcontainer
|
||||||
args:
|
|
||||||
- FFMPEG=true
|
|
||||||
- GO_TAGS=p2p tts
|
|
||||||
env_file:
|
env_file:
|
||||||
- ../.env
|
- ../.env
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
@@ -3,7 +3,13 @@
|
|||||||
.vscode
|
.vscode
|
||||||
.devcontainer
|
.devcontainer
|
||||||
models
|
models
|
||||||
|
backends
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
|
backend/go/image/stablediffusion-ggml/build/
|
||||||
|
backend/go/*/build
|
||||||
|
backend/go/*/.cache
|
||||||
|
backend/go/*/sources
|
||||||
|
backend/go/*/package
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
examples/**/models
|
examples/**/models
|
||||||
Dockerfile*
|
Dockerfile*
|
||||||
@@ -14,4 +20,4 @@ __pycache__
|
|||||||
|
|
||||||
# backend virtual environments
|
# backend virtual environments
|
||||||
**/venv
|
**/venv
|
||||||
backend/python/**/source
|
backend/python/**/source
|
||||||
|
|||||||
7
.env
7
.env
@@ -41,13 +41,6 @@
|
|||||||
## Uncomment and set to true to enable rebuilding from source
|
## Uncomment and set to true to enable rebuilding from source
|
||||||
# REBUILD=true
|
# REBUILD=true
|
||||||
|
|
||||||
## Enable go tags, available: p2p, tts
|
|
||||||
## p2p: enable distributed inferencing
|
|
||||||
## tts: enables text-to-speech with go-piper
|
|
||||||
## (requires REBUILD=true)
|
|
||||||
#
|
|
||||||
# GO_TAGS=p2p
|
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
|
|
||||||
|
|||||||
9
.github/bump_deps.sh
vendored
9
.github/bump_deps.sh
vendored
@@ -3,15 +3,20 @@ set -xe
|
|||||||
REPO=$1
|
REPO=$1
|
||||||
BRANCH=$2
|
BRANCH=$2
|
||||||
VAR=$3
|
VAR=$3
|
||||||
|
FILE=$4
|
||||||
|
|
||||||
|
if [ -z "$FILE" ]; then
|
||||||
|
FILE="Makefile"
|
||||||
|
fi
|
||||||
|
|
||||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||||
|
|
||||||
# Read $VAR from Makefile (only first match)
|
# Read $VAR from Makefile (only first match)
|
||||||
set +e
|
set +e
|
||||||
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
|
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" $FILE | cut -d'=' -f2)"
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
sed -i $FILE -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||||
|
|
||||||
if [ -z "$CURRENT_COMMIT" ]; then
|
if [ -z "$CURRENT_COMMIT" ]; then
|
||||||
echo "Could not find $VAR in Makefile."
|
echo "Could not find $VAR in Makefile."
|
||||||
|
|||||||
1064
.github/workflows/backend.yml
vendored
1064
.github/workflows/backend.yml
vendored
File diff suppressed because it is too large
Load Diff
30
.github/workflows/backend_build.yml
vendored
30
.github/workflows/backend_build.yml
vendored
@@ -49,19 +49,25 @@ on:
|
|||||||
description: 'Build Dockerfile'
|
description: 'Build Dockerfile'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
|
skip-drivers:
|
||||||
|
description: 'Skip drivers'
|
||||||
|
default: 'false'
|
||||||
|
type: string
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername:
|
dockerUsername:
|
||||||
required: true
|
required: false
|
||||||
dockerPassword:
|
dockerPassword:
|
||||||
required: true
|
required: false
|
||||||
quayUsername:
|
quayUsername:
|
||||||
required: true
|
required: true
|
||||||
quayPassword:
|
quayPassword:
|
||||||
required: true
|
required: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
reusable_python_backend-build:
|
backend-build:
|
||||||
runs-on: ${{ inputs.runs-on }}
|
runs-on: ${{ inputs.runs-on }}
|
||||||
|
env:
|
||||||
|
quay_username: ${{ secrets.quayUsername }}
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
|
|
||||||
@@ -91,7 +97,7 @@ jobs:
|
|||||||
&& sudo apt-get install -y git
|
&& sudo apt-get install -y git
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Release space from worker
|
- name: Release space from worker
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
if: inputs.runs-on == 'ubuntu-latest'
|
||||||
@@ -183,7 +189,7 @@ jobs:
|
|||||||
password: ${{ secrets.dockerPassword }}
|
password: ${{ secrets.dockerPassword }}
|
||||||
|
|
||||||
- name: Login to Quay.io
|
- name: Login to Quay.io
|
||||||
# if: github.event_name != 'pull_request'
|
if: ${{ env.quay_username != '' }}
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
registry: quay.io
|
registry: quay.io
|
||||||
@@ -197,12 +203,13 @@ jobs:
|
|||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
|
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
BACKEND=${{ inputs.backend }}
|
BACKEND=${{ inputs.backend }}
|
||||||
context: ./backend
|
context: ${{ inputs.context }}
|
||||||
file: ./backend/Dockerfile.python
|
file: ${{ inputs.dockerfile }}
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
@@ -216,15 +223,16 @@ jobs:
|
|||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
|
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
BACKEND=${{ inputs.backend }}
|
BACKEND=${{ inputs.backend }}
|
||||||
context: ./backend
|
context: ${{ inputs.context }}
|
||||||
file: ./backend/Dockerfile.python
|
file: ${{ inputs.dockerfile }}
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
push: true
|
push: ${{ env.quay_username != '' }}
|
||||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
||||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
||||||
|
|
||||||
@@ -232,4 +240,4 @@ jobs:
|
|||||||
|
|
||||||
- name: job summary
|
- name: job summary
|
||||||
run: |
|
run: |
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|||||||
144
.github/workflows/backend_build_darwin.yml
vendored
Normal file
144
.github/workflows/backend_build_darwin.yml
vendored
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
---
|
||||||
|
name: 'build darwin python backend container images (reusable)'
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
backend:
|
||||||
|
description: 'Backend to build'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
build-type:
|
||||||
|
description: 'Build type (e.g., mps)'
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
|
use-pip:
|
||||||
|
description: 'Use pip to install dependencies'
|
||||||
|
default: false
|
||||||
|
type: boolean
|
||||||
|
lang:
|
||||||
|
description: 'Programming language (e.g. go)'
|
||||||
|
default: 'python'
|
||||||
|
type: string
|
||||||
|
go-version:
|
||||||
|
description: 'Go version to use'
|
||||||
|
default: '1.24.x'
|
||||||
|
type: string
|
||||||
|
tag-suffix:
|
||||||
|
description: 'Tag suffix for the built image'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
runs-on:
|
||||||
|
description: 'Runner to use'
|
||||||
|
default: 'macOS-14'
|
||||||
|
type: string
|
||||||
|
secrets:
|
||||||
|
dockerUsername:
|
||||||
|
required: false
|
||||||
|
dockerPassword:
|
||||||
|
required: false
|
||||||
|
quayUsername:
|
||||||
|
required: true
|
||||||
|
quayPassword:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
darwin-backend-build:
|
||||||
|
runs-on: ${{ inputs.runs-on }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
go-version: ['${{ inputs.go-version }}']
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v5
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
|
||||||
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
cache: false
|
||||||
|
|
||||||
|
# You can test your matrix by printing the current Go version
|
||||||
|
- name: Display Go version
|
||||||
|
run: go version
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
|
|
||||||
|
- name: Build ${{ inputs.backend }}-darwin
|
||||||
|
run: |
|
||||||
|
make protogen-go
|
||||||
|
BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} USE_PIP=${{ inputs.use-pip }} make build-darwin-${{ inputs.lang }}-backend
|
||||||
|
|
||||||
|
- name: Upload ${{ inputs.backend }}.tar
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: ${{ inputs.backend }}-tar
|
||||||
|
path: backend-images/${{ inputs.backend }}.tar
|
||||||
|
|
||||||
|
darwin-backend-publish:
|
||||||
|
needs: darwin-backend-build
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Download ${{ inputs.backend }}.tar
|
||||||
|
uses: actions/download-artifact@v5
|
||||||
|
with:
|
||||||
|
name: ${{ inputs.backend }}-tar
|
||||||
|
path: .
|
||||||
|
|
||||||
|
- name: Install crane
|
||||||
|
run: |
|
||||||
|
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
|
||||||
|
sudo mv crane /usr/local/bin/
|
||||||
|
|
||||||
|
- name: Log in to DockerHub
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.dockerPassword }}" | crane auth login docker.io -u "${{ secrets.dockerUsername }}" --password-stdin
|
||||||
|
|
||||||
|
- name: Log in to quay.io
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.quayPassword }}" | crane auth login quay.io -u "${{ secrets.quayUsername }}" --password-stdin
|
||||||
|
|
||||||
|
- name: Docker meta
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
localai/localai-backends
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=semver,pattern={{raw}}
|
||||||
|
type=sha
|
||||||
|
flavor: |
|
||||||
|
latest=auto
|
||||||
|
suffix=${{ inputs.tag-suffix }},onlatest=true
|
||||||
|
|
||||||
|
- name: Docker meta
|
||||||
|
id: quaymeta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
quay.io/go-skynet/local-ai-backends
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=semver,pattern={{raw}}
|
||||||
|
type=sha
|
||||||
|
flavor: |
|
||||||
|
latest=auto
|
||||||
|
suffix=${{ inputs.tag-suffix }},onlatest=true
|
||||||
|
|
||||||
|
- name: Push Docker image (DockerHub)
|
||||||
|
run: |
|
||||||
|
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
|
||||||
|
crane push ${{ inputs.backend }}.tar $tag
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Push Docker image (Quay)
|
||||||
|
run: |
|
||||||
|
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
|
||||||
|
crane push ${{ inputs.backend }}.tar $tag
|
||||||
|
done
|
||||||
78
.github/workflows/backend_pr.yml
vendored
Normal file
78
.github/workflows/backend_pr.yml
vendored
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
name: 'build backend container images (PR-filtered)'
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-backends-pr-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
generate-matrix:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
matrix-darwin: ${{ steps.set-matrix.outputs.matrix-darwin }}
|
||||||
|
has-backends: ${{ steps.set-matrix.outputs.has-backends }}
|
||||||
|
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
|
- name: Setup Bun
|
||||||
|
uses: oven-sh/setup-bun@v2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
bun add js-yaml
|
||||||
|
bun add @octokit/core
|
||||||
|
|
||||||
|
# filters the matrix in backend.yml
|
||||||
|
- name: Filter matrix for changed backends
|
||||||
|
id: set-matrix
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
GITHUB_EVENT_PATH: ${{ github.event_path }}
|
||||||
|
run: bun run scripts/changed-backends.js
|
||||||
|
|
||||||
|
backend-jobs:
|
||||||
|
needs: generate-matrix
|
||||||
|
uses: ./.github/workflows/backend_build.yml
|
||||||
|
if: needs.generate-matrix.outputs.has-backends == 'true'
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
backend: ${{ matrix.backend }}
|
||||||
|
dockerfile: ${{ matrix.dockerfile }}
|
||||||
|
skip-drivers: ${{ matrix.skip-drivers }}
|
||||||
|
context: ${{ matrix.context }}
|
||||||
|
secrets:
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: true
|
||||||
|
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
|
||||||
|
backend-jobs-darwin:
|
||||||
|
needs: generate-matrix
|
||||||
|
uses: ./.github/workflows/backend_build_darwin.yml
|
||||||
|
if: needs.generate-matrix.outputs.has-backends-darwin == 'true'
|
||||||
|
with:
|
||||||
|
backend: ${{ matrix.backend }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
go-version: "1.24.x"
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
lang: ${{ matrix.lang || 'python' }}
|
||||||
|
use-pip: ${{ matrix.backend == 'diffusers' }}
|
||||||
|
runs-on: "macOS-14"
|
||||||
|
secrets:
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: true
|
||||||
|
matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix-darwin) }}
|
||||||
67
.github/workflows/build-test.yaml
vendored
Normal file
67
.github/workflows/build-test.yaml
vendored
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
name: Build test
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v5
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: 1.23
|
||||||
|
- name: Run GoReleaser
|
||||||
|
run: |
|
||||||
|
make dev-dist
|
||||||
|
launcher-build-darwin:
|
||||||
|
runs-on: macos-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v5
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: 1.23
|
||||||
|
- name: Build launcher for macOS ARM64
|
||||||
|
run: |
|
||||||
|
make build-launcher-darwin
|
||||||
|
ls -liah dist
|
||||||
|
- name: Upload macOS launcher artifacts
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: launcher-macos
|
||||||
|
path: dist/
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
launcher-build-linux:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v5
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: 1.23
|
||||||
|
- name: Build launcher for Linux
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install golang gcc libgl1-mesa-dev xorg-dev libxkbcommon-dev
|
||||||
|
make build-launcher-linux
|
||||||
|
- name: Upload Linux launcher artifacts
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: launcher-linux
|
||||||
|
path: local-ai-launcher-linux.tar.xz
|
||||||
|
retention-days: 30
|
||||||
14
.github/workflows/bump_deps.yaml
vendored
14
.github/workflows/bump_deps.yaml
vendored
@@ -10,30 +10,32 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- repository: "ggml-org/llama.cpp"
|
- repository: "ggml-org/llama.cpp"
|
||||||
variable: "CPPLLAMA_VERSION"
|
variable: "LLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
file: "backend/cpp/llama-cpp/Makefile"
|
||||||
- repository: "ggml-org/whisper.cpp"
|
- repository: "ggml-org/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
file: "backend/go/whisper/Makefile"
|
||||||
- repository: "PABannier/bark.cpp"
|
- repository: "PABannier/bark.cpp"
|
||||||
variable: "BARKCPP_VERSION"
|
variable: "BARKCPP_VERSION"
|
||||||
branch: "main"
|
branch: "main"
|
||||||
|
file: "Makefile"
|
||||||
- repository: "leejet/stable-diffusion.cpp"
|
- repository: "leejet/stable-diffusion.cpp"
|
||||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
variable: "STABLEDIFFUSION_GGML_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "mudler/go-stable-diffusion"
|
file: "backend/go/stablediffusion-ggml/Makefile"
|
||||||
variable: "STABLEDIFFUSION_VERSION"
|
|
||||||
branch: "master"
|
|
||||||
- repository: "mudler/go-piper"
|
- repository: "mudler/go-piper"
|
||||||
variable: "PIPER_VERSION"
|
variable: "PIPER_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
file: "backend/go/piper/Makefile"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
id: bump
|
id: bump
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} ${{ matrix.file }}
|
||||||
{
|
{
|
||||||
echo 'message<<EOF'
|
echo 'message<<EOF'
|
||||||
cat "${{ matrix.variable }}_message.txt"
|
cat "${{ matrix.variable }}_message.txt"
|
||||||
|
|||||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -12,7 +12,7 @@ jobs:
|
|||||||
- repository: "mudler/LocalAI"
|
- repository: "mudler/LocalAI"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
- name: Bump dependencies 🔧
|
- name: Bump dependencies 🔧
|
||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
|
|||||||
3
.github/workflows/checksum_checker.yaml
vendored
3
.github/workflows/checksum_checker.yaml
vendored
@@ -15,12 +15,11 @@ jobs:
|
|||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||||
&& sudo apt-get update \
|
&& sudo apt-get update \
|
||||||
&& sudo apt-get install -y git
|
&& sudo apt-get install -y git
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y pip wget
|
sudo apt-get install -y pip wget
|
||||||
sudo pip install --upgrade pip
|
|
||||||
pip install huggingface_hub
|
pip install huggingface_hub
|
||||||
- name: 'Setup yq'
|
- name: 'Setup yq'
|
||||||
uses: dcarbone/install-yq-action@v1.3.1
|
uses: dcarbone/install-yq-action@v1.3.1
|
||||||
|
|||||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
|||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Approve a PR if not already approved
|
- name: Approve a PR if not already approved
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
4
.github/workflows/deploy-explorer.yaml
vendored
4
.github/workflows/deploy-explorer.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
@@ -31,7 +31,7 @@ jobs:
|
|||||||
make protogen-go
|
make protogen-go
|
||||||
- name: Build api
|
- name: Build api
|
||||||
run: |
|
run: |
|
||||||
CGO_ENABLED=0 make build-api
|
CGO_ENABLED=0 make build
|
||||||
- name: rm
|
- name: rm
|
||||||
uses: appleboy/ssh-action@v1.2.2
|
uses: appleboy/ssh-action@v1.2.2
|
||||||
with:
|
with:
|
||||||
|
|||||||
2
.github/workflows/generate_grpc_cache.yaml
vendored
2
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -73,7 +73,7 @@ jobs:
|
|||||||
uses: docker/setup-buildx-action@master
|
uses: docker/setup-buildx-action@master
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Cache GRPC
|
- name: Cache GRPC
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
|
|||||||
4
.github/workflows/generate_intel_image.yaml
vendored
4
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
- base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
@@ -43,7 +43,7 @@ jobs:
|
|||||||
uses: docker/setup-buildx-action@master
|
uses: docker/setup-buildx-action@master
|
||||||
|
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Cache Intel images
|
- name: Cache Intel images
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
|
|||||||
17
.github/workflows/image-pr.yml
vendored
17
.github/workflows/image-pr.yml
vendored
@@ -14,7 +14,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -40,8 +39,7 @@ jobs:
|
|||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
|
tag-suffix: '-gpu-nvidia-cuda-12'
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
@@ -49,25 +47,22 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
ffmpeg: 'false'
|
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: 'sycl-f16-ffmpeg'
|
tag-suffix: 'sycl'
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan-ffmpeg-core'
|
tag-suffix: '-vulkan-core'
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
|||||||
36
.github/workflows/image.yml
vendored
36
.github/workflows/image.yml
vendored
@@ -18,7 +18,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -40,8 +39,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-hipblas'
|
tag-suffix: '-gpu-hipblas'
|
||||||
ffmpeg: 'true'
|
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
@@ -52,7 +50,6 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -76,7 +73,6 @@ jobs:
|
|||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: ''
|
tag-suffix: ''
|
||||||
ffmpeg: 'true'
|
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
aio: "-aio-cpu"
|
aio: "-aio-cpu"
|
||||||
@@ -87,8 +83,7 @@ jobs:
|
|||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-nvidia-cuda11'
|
tag-suffix: '-gpu-nvidia-cuda-11'
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
@@ -99,50 +94,36 @@ jobs:
|
|||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-gpu-nvidia-cuda12'
|
tag-suffix: '-gpu-nvidia-cuda-12'
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
aio: "-aio-gpu-nvidia-cuda-12"
|
aio: "-aio-gpu-nvidia-cuda-12"
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-vulkan'
|
tag-suffix: '-gpu-vulkan'
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
aio: "-aio-gpu-vulkan"
|
aio: "-aio-gpu-vulkan"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'intel'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-gpu-intel-f16'
|
tag-suffix: '-gpu-intel'
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
aio: "-aio-gpu-intel-f16"
|
aio: "-aio-gpu-intel"
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-gpu-intel-f32'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
runs-on: 'ubuntu-latest'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
aio: "-aio-gpu-intel-f32"
|
|
||||||
|
|
||||||
gh-runner:
|
gh-runner:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
build-type: ${{ matrix.build-type }}
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
@@ -167,7 +148,6 @@ jobs:
|
|||||||
platforms: 'linux/arm64'
|
platforms: 'linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-nvidia-l4t-arm64'
|
tag-suffix: '-nvidia-l4t-arm64'
|
||||||
ffmpeg: 'true'
|
|
||||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
runs-on: 'ubuntu-24.04-arm'
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
|||||||
8
.github/workflows/image_build.yml
vendored
8
.github/workflows/image_build.yml
vendored
@@ -37,10 +37,6 @@ on:
|
|||||||
description: 'Tag suffix'
|
description: 'Tag suffix'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
ffmpeg:
|
|
||||||
description: 'FFMPEG'
|
|
||||||
default: ''
|
|
||||||
type: string
|
|
||||||
skip-drivers:
|
skip-drivers:
|
||||||
description: 'Skip drivers by default'
|
description: 'Skip drivers by default'
|
||||||
default: 'false'
|
default: 'false'
|
||||||
@@ -98,7 +94,7 @@ jobs:
|
|||||||
&& sudo apt-get update \
|
&& sudo apt-get update \
|
||||||
&& sudo apt-get install -y git
|
&& sudo apt-get install -y git
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Release space from worker
|
- name: Release space from worker
|
||||||
if: inputs.runs-on == 'ubuntu-latest'
|
if: inputs.runs-on == 'ubuntu-latest'
|
||||||
@@ -236,7 +232,6 @@ jobs:
|
|||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
FFMPEG=${{ inputs.ffmpeg }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
@@ -264,7 +259,6 @@ jobs:
|
|||||||
BUILD_TYPE=${{ inputs.build-type }}
|
BUILD_TYPE=${{ inputs.build-type }}
|
||||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
FFMPEG=${{ inputs.ffmpeg }}
|
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
|
|||||||
2
.github/workflows/labeler.yml
vendored
2
.github/workflows/labeler.yml
vendored
@@ -9,4 +9,4 @@ jobs:
|
|||||||
pull-requests: write
|
pull-requests: write
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/labeler@v5
|
- uses: actions/labeler@v6
|
||||||
5
.github/workflows/localaibot_automerge.yml
vendored
5
.github/workflows/localaibot_automerge.yml
vendored
@@ -6,14 +6,15 @@ permissions:
|
|||||||
contents: write
|
contents: write
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
packages: read
|
packages: read
|
||||||
|
issues: write # for Homebrew/actions/post-comment
|
||||||
|
actions: write # to dispatch publish workflow
|
||||||
jobs:
|
jobs:
|
||||||
dependabot:
|
dependabot:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: ${{ github.actor == 'localai-bot' }}
|
if: ${{ github.actor == 'localai-bot' }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
|
|
||||||
- name: Approve a PR if not already approved
|
- name: Approve a PR if not already approved
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
6
.github/workflows/notify-models.yaml
vendored
6
.github/workflows/notify-models.yaml
vendored
@@ -11,7 +11,7 @@ jobs:
|
|||||||
MODEL_NAME: gemma-3-12b-it
|
MODEL_NAME: gemma-3-12b-it
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||||
- uses: mudler/localai-github-action@v1
|
- uses: mudler/localai-github-action@v1
|
||||||
@@ -90,13 +90,13 @@ jobs:
|
|||||||
MODEL_NAME: gemma-3-12b-it
|
MODEL_NAME: gemma-3-12b-it
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||||
- name: Start LocalAI
|
- name: Start LocalAI
|
||||||
run: |
|
run: |
|
||||||
echo "Starting LocalAI..."
|
echo "Starting LocalAI..."
|
||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master run --debug $MODEL_NAME
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.8.1
|
- uses: GrantBirki/git-diff-action@v2.8.1
|
||||||
|
|||||||
391
.github/workflows/release.yaml
vendored
391
.github/workflows/release.yaml
vendored
@@ -1,375 +1,64 @@
|
|||||||
name: Build and Release
|
name: goreleaser
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
tags:
|
tags:
|
||||||
- 'v*'
|
- 'v*'
|
||||||
pull_request:
|
|
||||||
|
|
||||||
env:
|
|
||||||
GRPC_VERSION: v1.65.0
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ci-releases-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
goreleaser:
|
||||||
build-linux-arm:
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
fetch-depth: 0
|
||||||
- uses: actions/setup-go@v5
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: 1.23
|
||||||
cache: false
|
- name: Run GoReleaser
|
||||||
- name: Dependencies
|
uses: goreleaser/goreleaser-action@v6
|
||||||
run: |
|
with:
|
||||||
sudo apt-get update
|
version: v2.11.0
|
||||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
args: release --clean
|
||||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
|
||||||
make install-go-tools
|
|
||||||
- name: Install CUDA Dependencies
|
|
||||||
run: |
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- name: Cache grpc
|
launcher-build-darwin:
|
||||||
id: cache-grpc
|
runs-on: macos-latest
|
||||||
uses: actions/cache@v4
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
path: grpc
|
fetch-depth: 0
|
||||||
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
|
- name: Set up Go
|
||||||
- name: Build grpc
|
uses: actions/setup-go@v5
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
|
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
|
||||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
../.. && sudo make --jobs 5 --output-sync=target
|
|
||||||
- name: Install gRPC
|
|
||||||
run: |
|
|
||||||
GNU_HOST=aarch64-linux-gnu
|
|
||||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
|
||||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
|
||||||
|
|
||||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
|
||||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
|
||||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
|
||||||
|
|
||||||
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
|
|
||||||
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
|
||||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
|
||||||
GRPC_DIR=$PWD/grpc
|
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
|
||||||
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
|
||||||
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
|
||||||
cd $GRPC_CROSS_BUILD_DIR && \
|
|
||||||
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
|
|
||||||
../.. && \
|
|
||||||
sudo make -j`nproc` install
|
|
||||||
- name: Build
|
|
||||||
id: build
|
|
||||||
run: |
|
|
||||||
GNU_HOST=aarch64-linux-gnu
|
|
||||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
|
||||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
|
||||||
|
|
||||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
|
||||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
|
||||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
|
||||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
|
||||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
|
||||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
|
||||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
|
||||||
GOOS=linux \
|
|
||||||
GOARCH=arm64 \
|
|
||||||
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
with:
|
||||||
name: LocalAI-linux-arm64
|
go-version: 1.23
|
||||||
path: release/
|
- name: Build launcher for macOS ARM64
|
||||||
- name: Release
|
run: |
|
||||||
|
make build-launcher-darwin
|
||||||
|
- name: Upload DMG to Release
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v2
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
with:
|
||||||
files: |
|
files: ./dist/LocalAI.dmg
|
||||||
release/*
|
launcher-build-linux:
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
build-linux:
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Free Disk Space (Ubuntu)
|
- name: Checkout
|
||||||
uses: jlumbroso/free-disk-space@main
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
# this might remove tools that are actually needed,
|
fetch-depth: 0
|
||||||
# if set to "true" but frees about 6 GB
|
- name: Set up Go
|
||||||
tool-cache: true
|
uses: actions/setup-go@v5
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: true
|
|
||||||
swap-storage: true
|
|
||||||
|
|
||||||
- name: Release space from worker
|
|
||||||
run: |
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
df -h
|
|
||||||
echo
|
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools snapd || true
|
|
||||||
sudo rm -rf /usr/local/lib/android
|
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
|
||||||
sudo rm -rf /usr/share/dotnet
|
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
|
||||||
sudo apt-get remove -y 'php.*' || true
|
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
|
||||||
sudo apt-get remove -y azure-cli || true
|
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
|
||||||
sudo apt-get remove -y microsoft-edge-stable || true
|
|
||||||
sudo apt-get remove -y firefox || true
|
|
||||||
sudo apt-get remove -y powershell || true
|
|
||||||
sudo apt-get remove -y r-base-core || true
|
|
||||||
sudo apt-get autoremove -y
|
|
||||||
sudo apt-get clean
|
|
||||||
echo
|
|
||||||
echo "Listing top largest packages"
|
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
|
||||||
head -n 30 <<< "${pkgs}"
|
|
||||||
echo
|
|
||||||
sudo rm -rfv build || true
|
|
||||||
sudo rm -rf /usr/share/dotnet || true
|
|
||||||
sudo rm -rf /opt/ghc || true
|
|
||||||
sudo rm -rf "/usr/local/share/boost" || true
|
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Force Install GIT latest
|
|
||||||
run: |
|
|
||||||
sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y software-properties-common \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
|
||||||
&& sudo apt-get update \
|
|
||||||
&& sudo apt-get install -y git
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
with:
|
||||||
submodules: true
|
go-version: 1.23
|
||||||
- uses: actions/setup-go@v5
|
- name: Build launcher for Linux
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
sudo apt-get install golang gcc libgl1-mesa-dev xorg-dev libxkbcommon-dev
|
||||||
make install-go-tools
|
make build-launcher-linux
|
||||||
- name: Intel Dependencies
|
- name: Upload Linux launcher artifacts
|
||||||
run: |
|
|
||||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
|
||||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install -y intel-basekit
|
|
||||||
- name: Install CUDA Dependencies
|
|
||||||
run: |
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
|
||||||
env:
|
|
||||||
CUDA_VERSION: 12-5
|
|
||||||
- name: "Install Hipblas"
|
|
||||||
env:
|
|
||||||
ROCM_VERSION: "6.1"
|
|
||||||
AMDGPU_VERSION: "6.1"
|
|
||||||
run: |
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
sudo apt-get update
|
|
||||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
|
|
||||||
|
|
||||||
sudo apt update
|
|
||||||
wget https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/noble/amdgpu-install_6.4.60401-1_all.deb
|
|
||||||
sudo apt install ./amdgpu-install_6.4.60401-1_all.deb
|
|
||||||
sudo apt update
|
|
||||||
|
|
||||||
sudo amdgpu-install --usecase=rocm
|
|
||||||
|
|
||||||
sudo apt-get clean
|
|
||||||
sudo rm -rf /var/lib/apt/lists/*
|
|
||||||
sudo ldconfig
|
|
||||||
- name: Cache grpc
|
|
||||||
id: cache-grpc
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: grpc
|
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
|
||||||
- name: Build grpc
|
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && \
|
|
||||||
cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
../.. && sudo make --jobs 5 --output-sync=target
|
|
||||||
- name: Install gRPC
|
|
||||||
run: |
|
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
|
||||||
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
|
|
||||||
- name: Build
|
|
||||||
id: build
|
|
||||||
run: |
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
|
||||||
export PATH=/opt/rocm/bin:$PATH
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
|
||||||
BACKEND_LIBS="./ld.so ./sources/go-piper/piper/build/fi/lib/libfmt.a ./sources/go-piper/piper-phonemize/pi/lib/libonnxruntime.so.1.14.1 ./sources/go-piper/piper-phonemize/pi/src/libespeak-ng/libespeak-ng.so /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/lib/x86_64-linux-gnu/libpthread.so.0 ./sources/go-piper/piper-phonemize/pi/lib/libpiper_phonemize.so.1 ./sources/go-piper/piper/build/si/lib/libspdlog.a ./sources/go-piper/espeak/ei/lib/libucd.so" \
|
|
||||||
make -j4 dist
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: LocalAI-linux
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
uses: softprops/action-gh-release@v2
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
with:
|
||||||
files: |
|
files: ./local-ai-launcher-linux.tar.xz
|
||||||
release/*
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
|
|
||||||
build-macOS-x86_64:
|
|
||||||
runs-on: macos-13
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
brew install protobuf grpc
|
|
||||||
make install-go-tools
|
|
||||||
- name: Build
|
|
||||||
id: build
|
|
||||||
run: |
|
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
|
|
||||||
make dist
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: LocalAI-MacOS-x86_64
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
build-macOS-arm64:
|
|
||||||
runs-on: macos-14
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
brew install protobuf grpc libomp llvm
|
|
||||||
make install-go-tools
|
|
||||||
- name: Build
|
|
||||||
id: build
|
|
||||||
run: |
|
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
|
||||||
make dist
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: LocalAI-MacOS-arm64
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|||||||
4
.github/workflows/secscan.yaml
vendored
4
.github/workflows/secscan.yaml
vendored
@@ -14,11 +14,11 @@ jobs:
|
|||||||
GO111MODULE: on
|
GO111MODULE: on
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Source
|
- name: Checkout Source
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.22.5
|
uses: securego/gosec@v2.22.9
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
2
.github/workflows/stalebot.yml
vendored
2
.github/workflows/stalebot.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
|||||||
stale:
|
stale:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9
|
- uses: actions/stale@3a9db7e6a41a89f618792c92c0e97cc736e1b13f # v9
|
||||||
with:
|
with:
|
||||||
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
|
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
|
||||||
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
|
stale-pr-message: 'This PR is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
|
||||||
|
|||||||
18
.github/workflows/test-extra.yml
vendored
18
.github/workflows/test-extra.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v5
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -40,7 +40,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -61,7 +61,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -83,7 +83,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -104,7 +104,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v5
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -124,7 +124,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v5
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -186,7 +186,7 @@ jobs:
|
|||||||
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
# df -h
|
# df -h
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v5
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -211,7 +211,7 @@ jobs:
|
|||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v5
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
@@ -232,7 +232,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
|
|||||||
77
.github/workflows/test.yml
vendored
77
.github/workflows/test.yml
vendored
@@ -23,6 +23,20 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
go-version: ['1.21.x']
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
|
- name: Free Disk Space (Ubuntu)
|
||||||
|
uses: jlumbroso/free-disk-space@main
|
||||||
|
with:
|
||||||
|
# this might remove tools that are actually needed,
|
||||||
|
# if set to "true" but frees about 6 GB
|
||||||
|
tool-cache: true
|
||||||
|
# all of these default to true, but feel free to set to
|
||||||
|
# "false" if necessary for your workflow
|
||||||
|
android: true
|
||||||
|
dotnet: true
|
||||||
|
haskell: true
|
||||||
|
large-packages: true
|
||||||
|
docker-images: true
|
||||||
|
swap-storage: true
|
||||||
- name: Release space from worker
|
- name: Release space from worker
|
||||||
run: |
|
run: |
|
||||||
echo "Listing top largest packages"
|
echo "Listing top largest packages"
|
||||||
@@ -56,7 +70,7 @@ jobs:
|
|||||||
sudo rm -rfv build || true
|
sudo rm -rfv build || true
|
||||||
df -h
|
df -h
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
@@ -67,18 +81,20 @@ jobs:
|
|||||||
# You can test your matrix by printing the current Go version
|
# You can test your matrix by printing the current Go version
|
||||||
- name: Display Go version
|
- name: Display Go version
|
||||||
run: go version
|
run: go version
|
||||||
|
- name: Proto Dependencies
|
||||||
|
run: |
|
||||||
|
# Install protoc
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||||
sudo apt-get install -y libgmock-dev clang
|
sudo apt-get install -y libgmock-dev clang
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
|
||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
|
||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
|
||||||
sudo apt-get update && \
|
|
||||||
sudo apt-get install -y conda
|
|
||||||
# Install UV
|
# Install UV
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
||||||
@@ -94,38 +110,15 @@ jobs:
|
|||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||||
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
|
pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
|
||||||
|
|
||||||
make -C backend/python/transformers
|
make -C backend/python/transformers
|
||||||
|
|
||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
make backends/huggingface backends/llama-cpp backends/local-store backends/silero-vad backends/piper backends/whisper backends/stablediffusion-ggml
|
||||||
make sources/go-piper && \
|
|
||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
CUDA_VERSION: 12-4
|
||||||
- name: Cache grpc
|
|
||||||
id: cache-grpc
|
|
||||||
uses: actions/cache@v4
|
|
||||||
with:
|
|
||||||
path: grpc
|
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
|
||||||
- name: Build grpc
|
|
||||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
cd grpc && sed -i "216i\ TESTONLY" "third_party/abseil-cpp/absl/container/CMakeLists.txt" && mkdir -p cmake/build && cd cmake/build && \
|
|
||||||
cmake -DgRPC_INSTALL=ON \
|
|
||||||
-DgRPC_BUILD_TESTS=OFF \
|
|
||||||
../.. && sudo make --jobs 5
|
|
||||||
- name: Install gRPC
|
|
||||||
run: |
|
|
||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||||
@@ -173,7 +166,7 @@ jobs:
|
|||||||
sudo rm -rfv build || true
|
sudo rm -rfv build || true
|
||||||
df -h
|
df -h
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -184,16 +177,10 @@ jobs:
|
|||||||
rm protoc.zip
|
rm protoc.zip
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Build images
|
|
||||||
run: |
|
|
||||||
docker build --build-arg FFMPEG=true --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
|
||||||
make run-e2e-aio
|
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
@@ -209,7 +196,7 @@ jobs:
|
|||||||
go-version: ['1.21.x']
|
go-version: ['1.21.x']
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v5
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
@@ -224,7 +211,10 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
|
pip install --user --no-cache-dir grpcio-tools==1.71.0 grpcio==1.71.0
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
- name: Build llama-cpp-darwin
|
||||||
|
run: |
|
||||||
|
make protogen-go
|
||||||
|
make backends/llama-cpp-darwin
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
@@ -232,7 +222,8 @@ jobs:
|
|||||||
export CC=/opt/homebrew/opt/llvm/bin/clang
|
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
|
PATH="$PATH:$HOME/go/bin" BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.22
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
|
|||||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -9,7 +9,7 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v5
|
||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: 'stable'
|
go-version: 'stable'
|
||||||
|
|||||||
13
.gitignore
vendored
13
.gitignore
vendored
@@ -5,9 +5,14 @@ __pycache__/
|
|||||||
*.o
|
*.o
|
||||||
get-sources
|
get-sources
|
||||||
prepare-sources
|
prepare-sources
|
||||||
/backend/cpp/llama/grpc-server
|
/backend/cpp/llama-cpp/grpc-server
|
||||||
/backend/cpp/llama/llama.cpp
|
/backend/cpp/llama-cpp/llama.cpp
|
||||||
/backend/cpp/llama-*
|
/backend/cpp/llama-*
|
||||||
|
!backend/cpp/llama-cpp
|
||||||
|
/backends
|
||||||
|
/backend-images
|
||||||
|
/result.yaml
|
||||||
|
protoc
|
||||||
|
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
@@ -19,7 +24,7 @@ go-bert
|
|||||||
|
|
||||||
# LocalAI build binary
|
# LocalAI build binary
|
||||||
LocalAI
|
LocalAI
|
||||||
local-ai
|
/local-ai
|
||||||
# prevent above rules from omitting the helm chart
|
# prevent above rules from omitting the helm chart
|
||||||
!charts/*
|
!charts/*
|
||||||
# prevent above rules from omitting the api/localai folder
|
# prevent above rules from omitting the api/localai folder
|
||||||
@@ -56,4 +61,4 @@ docs/static/gallery.html
|
|||||||
**/venv
|
**/venv
|
||||||
|
|
||||||
# per-developer customization files for the development container
|
# per-developer customization files for the development container
|
||||||
.devcontainer/customization/*
|
.devcontainer/customization/*
|
||||||
|
|||||||
33
.goreleaser.yaml
Normal file
33
.goreleaser.yaml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
version: 2
|
||||||
|
before:
|
||||||
|
hooks:
|
||||||
|
- make protogen-go
|
||||||
|
- go mod tidy
|
||||||
|
dist: release
|
||||||
|
source:
|
||||||
|
enabled: true
|
||||||
|
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
||||||
|
builds:
|
||||||
|
- main: ./cmd/local-ai
|
||||||
|
env:
|
||||||
|
- CGO_ENABLED=0
|
||||||
|
ldflags:
|
||||||
|
- -s -w
|
||||||
|
- -X "github.com/mudler/LocalAI/internal.Version={{ .Tag }}"
|
||||||
|
- -X "github.com/mudler/LocalAI/internal.Commit={{ .FullCommit }}"
|
||||||
|
goos:
|
||||||
|
- linux
|
||||||
|
- darwin
|
||||||
|
#- windows
|
||||||
|
goarch:
|
||||||
|
- amd64
|
||||||
|
- arm64
|
||||||
|
archives:
|
||||||
|
- formats: [ 'binary' ] # this removes the tar of the archives, leaving the binaries alone
|
||||||
|
name_template: local-ai-{{ .Tag }}-{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}
|
||||||
|
checksum:
|
||||||
|
name_template: '{{ .ProjectName }}-{{ .Tag }}-checksums.txt'
|
||||||
|
snapshot:
|
||||||
|
version_template: "{{ .Tag }}-next"
|
||||||
|
changelog:
|
||||||
|
use: github-native
|
||||||
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -26,7 +26,7 @@
|
|||||||
"LOCALAI_P2P": "true",
|
"LOCALAI_P2P": "true",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
},
|
},
|
||||||
"buildFlags": ["-tags", "p2p tts", "-v"],
|
"buildFlags": ["-tags", "", "-v"],
|
||||||
"envFile": "${workspaceFolder}/.env",
|
"envFile": "${workspaceFolder}/.env",
|
||||||
"cwd": "${workspaceRoot}"
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
|
|||||||
146
Dockerfile
146
Dockerfile
@@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ca-certificates curl wget espeak-ng libgomp1 \
|
ca-certificates curl wget espeak-ng libgomp1 \
|
||||||
python3 python-is-python3 ffmpeg && \
|
ffmpeg libopenblas-base libopenblas-dev && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
@@ -24,19 +24,35 @@ ARG TARGETARCH
|
|||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
|
||||||
|
RUN mkdir -p /run/localai
|
||||||
|
RUN echo "default" > /run/localai/capability
|
||||||
|
|
||||||
# Vulkan requirements
|
# Vulkan requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
echo "vulkan" > /run/localai/capability && \
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
apt-get update && \
|
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
apt-get install -y \
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
vulkan-sdk && \
|
apt-get update && \
|
||||||
apt-get clean && \
|
apt-get install -y \
|
||||||
rm -rf /var/lib/apt/lists/*
|
vulkan-sdk && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
|
# For ARM64, we need to build the Vulkan SDK manually as there are no packages available
|
||||||
|
mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
|
||||||
|
tar -xvf vulkan-sdk.tar.xz && \
|
||||||
|
rm vulkan-sdk.tar.xz && \
|
||||||
|
cd * && \
|
||||||
|
cp -rfv aarch64/* /usr/ && \
|
||||||
|
cd ../.. && \
|
||||||
|
rm -rf vulkan
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
@@ -63,7 +79,24 @@ RUN <<EOT bash
|
|||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
echo "nvidia" > /run/localai/capability
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
|
||||||
|
echo "nvidia-l4t" > /run/localai/capability
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
# https://github.com/NVIDIA/Isaac-GR00T/issues/343
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "arm64" ]; then
|
||||||
|
wget https://developer.download.nvidia.com/compute/cudss/0.6.0/local_installers/cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
|
||||||
|
dpkg -i cudss-local-tegra-repo-ubuntu2204-0.6.0_0.6.0-1_arm64.deb && \
|
||||||
|
cp /var/cudss-local-tegra-repo-ubuntu2204-0.6.0/cudss-*-keyring.gpg /usr/share/keyrings/ && \
|
||||||
|
apt-get update && apt-get -y install cudss
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
@@ -83,11 +116,18 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
|||||||
rocblas-dev && \
|
rocblas-dev && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
echo "amd" > /run/localai/capability && \
|
||||||
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||||
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||||
ldconfig \
|
ldconfig \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
|
ln -s /opt/rocm-**/lib/llvm/lib/libomp.so /usr/lib/libomp.so \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN expr "${BUILD_TYPE}" = intel && echo "intel" > /run/localai/capability || echo "not intel"
|
||||||
|
|
||||||
# Cuda
|
# Cuda
|
||||||
ENV PATH=/usr/local/cuda/bin:${PATH}
|
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
@@ -121,7 +161,7 @@ RUN apt-get update && \
|
|||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
# Install CMake (the version in 22.04 is too old)
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||||
else
|
else
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
@@ -136,10 +176,9 @@ EOT
|
|||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers and rice
|
# Install grpc compilers
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install github.com/GeertJohan/go.rice/rice@latest
|
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
@@ -175,57 +214,12 @@ FROM ${INTEL_BASE_IMAGE} AS intel
|
|||||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
||||||
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
||||||
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
|
||||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
|
||||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
|
||||||
|
|
||||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
|
||||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
|
||||||
ARG GRPC_VERSION=v1.65.0
|
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ca-certificates \
|
intel-oneapi-runtime-libs && \
|
||||||
build-essential curl libssl-dev \
|
|
||||||
git && \
|
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
|
||||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
|
||||||
# and running make install in the target container
|
|
||||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
|
||||||
mkdir -p /build/grpc/cmake/build && \
|
|
||||||
cd /build/grpc/cmake/build && \
|
|
||||||
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
|
||||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
|
||||||
make && \
|
|
||||||
make install && \
|
|
||||||
rm -rf /build
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
@@ -233,7 +227,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
|
|
||||||
FROM build-requirements AS builder-base
|
FROM build-requirements AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS="tts p2p"
|
ARG GO_TAGS=""
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG LD_FLAGS="-s -w"
|
ARG LD_FLAGS="-s -w"
|
||||||
@@ -252,9 +246,7 @@ RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
|
|||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
# We need protoc installed, and the version in 22.04 is too old.
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
|
||||||
# here so that we can generate the grpc code for the stablediffusion build
|
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
@@ -276,8 +268,6 @@ FROM builder-base AS builder-backends
|
|||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY ./Makefile .
|
COPY ./Makefile .
|
||||||
@@ -292,13 +282,7 @@ COPY ./pkg/utils ./pkg/utils
|
|||||||
COPY ./pkg/langchain ./pkg/langchain
|
COPY ./pkg/langchain ./pkg/langchain
|
||||||
|
|
||||||
RUN ls -l ./
|
RUN ls -l ./
|
||||||
RUN make backend-assets
|
RUN make protogen-go
|
||||||
RUN make prepare
|
|
||||||
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make grpcs; \
|
|
||||||
else \
|
|
||||||
make grpcs; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
# Adjustments to the build process should likely be made here.
|
# Adjustments to the build process should likely be made here.
|
||||||
@@ -311,16 +295,7 @@ COPY . .
|
|||||||
## Build the binary
|
## Build the binary
|
||||||
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
||||||
## Otherwise just run the normal build
|
## Otherwise just run the normal build
|
||||||
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
RUN make build
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
|
||||||
else \
|
|
||||||
make build; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
|
||||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
|
||||||
touch /build/sources/go-piper/piper-phonemize/pi/lib/keep \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
@@ -330,8 +305,6 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
|||||||
|
|
||||||
FROM builder-base AS devcontainer
|
FROM builder-base AS devcontainer
|
||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
@@ -364,9 +337,6 @@ COPY ./entrypoint.sh .
|
|||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
|
|
||||||
# Copy shared libraries for piper
|
|
||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
RUN mkdir -p /models /backends
|
RUN mkdir -p /models /backends
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
VERSION 0.7
|
|
||||||
|
|
||||||
build:
|
|
||||||
FROM DOCKERFILE -f Dockerfile .
|
|
||||||
SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
|
|
||||||
82
README.md
82
README.md
@@ -1,6 +1,6 @@
|
|||||||
<h1 align="center">
|
<h1 align="center">
|
||||||
<br>
|
<br>
|
||||||
<img height="300" src="./core/http/static/logo.png"> <br>
|
<img width="300" src="./core/http/static/logo.png"> <br>
|
||||||
<br>
|
<br>
|
||||||
</h1>
|
</h1>
|
||||||
|
|
||||||
@@ -43,7 +43,7 @@
|
|||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
||||||
[](https://t.me/localaiofficial_bot)
|
[](https://t.me/localaiofficial_bot)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
@@ -110,6 +110,12 @@ curl https://localai.io/install.sh | sh
|
|||||||
|
|
||||||
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
||||||
|
|
||||||
|
### macOS Download:
|
||||||
|
|
||||||
|
<a href="https://github.com/mudler/LocalAI/releases/latest/download/LocalAI.dmg">
|
||||||
|
<img src="https://img.shields.io/badge/Download-macOS-blue?style=for-the-badge&logo=apple&logoColor=white" alt="Download LocalAI for macOS"/>
|
||||||
|
</a>
|
||||||
|
|
||||||
Or run with docker:
|
Or run with docker:
|
||||||
|
|
||||||
### CPU only image:
|
### CPU only image:
|
||||||
@@ -140,11 +146,7 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri
|
|||||||
### Intel GPU Images (oneAPI):
|
### Intel GPU Images (oneAPI):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Intel GPU with FP16 support
|
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f16
|
|
||||||
|
|
||||||
# Intel GPU with FP32 support
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-intel-f32
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Vulkan GPU Images:
|
### Vulkan GPU Images:
|
||||||
@@ -166,7 +168,7 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai
|
|||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
|
||||||
|
|
||||||
# Intel GPU version
|
# Intel GPU version
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
|
||||||
|
|
||||||
# AMD GPU version
|
# AMD GPU version
|
||||||
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
|
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
|
||||||
@@ -189,10 +191,15 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
|||||||
local-ai run oci://localai/phi-2:latest
|
local-ai run oci://localai/phi-2:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> ⚡ **Automatic Backend Detection**: When you install models from the gallery or YAML files, LocalAI automatically detects your system's GPU capabilities (NVIDIA, AMD, Intel) and downloads the appropriate backend. For advanced configuration options, see [GPU Acceleration](https://localai.io/features/gpu-acceleration/#automatic-backend-detection).
|
||||||
|
|
||||||
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
## 📰 Latest project news
|
## 📰 Latest project news
|
||||||
|
|
||||||
|
- August 2025: MLX, MLX-VLM, Diffusers and llama.cpp are now supported on Mac M1/M2/M3+ chips ( with `development` suffix in the gallery ): https://github.com/mudler/LocalAI/pull/6049 https://github.com/mudler/LocalAI/pull/6119 https://github.com/mudler/LocalAI/pull/6121 https://github.com/mudler/LocalAI/pull/6060
|
||||||
|
- July/August 2025: 🔍 [Object Detection](https://localai.io/features/object-detection/) added to the API featuring [rf-detr](https://github.com/roboflow/rf-detr)
|
||||||
|
- July 2025: All backends migrated outside of the main binary. LocalAI is now more lightweight, small, and automatically downloads the required backend to run the model. [Read the release notes](https://github.com/mudler/LocalAI/releases/tag/v3.2.0)
|
||||||
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
|
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
|
||||||
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
|
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
|
||||||
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
|
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
|
||||||
@@ -215,6 +222,7 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
|
|||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
|
- 🧩 [Backend Gallery](https://localai.io/backends/): Install/remove backends on the fly, powered by OCI images — fully customizable and API-driven.
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||||
@@ -224,12 +232,67 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
|
|||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
|
- 🔍 [Object Detection](https://localai.io/features/object-detection/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||||
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
|
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
|
||||||
- 🔊 Voice activity detection (Silero-VAD support)
|
- 🔊 Voice activity detection (Silero-VAD support)
|
||||||
- 🌍 Integrated WebUI!
|
- 🌍 Integrated WebUI!
|
||||||
|
|
||||||
|
## 🧩 Supported Backends & Acceleration
|
||||||
|
|
||||||
|
LocalAI supports a comprehensive range of AI backends with multiple acceleration options:
|
||||||
|
|
||||||
|
### Text Generation & Language Models
|
||||||
|
| Backend | Description | Acceleration Support |
|
||||||
|
|---------|-------------|---------------------|
|
||||||
|
| **llama.cpp** | LLM inference in C/C++ | CUDA 11/12, ROCm, Intel SYCL, Vulkan, Metal, CPU |
|
||||||
|
| **vLLM** | Fast LLM inference with PagedAttention | CUDA 12, ROCm, Intel |
|
||||||
|
| **transformers** | HuggingFace transformers framework | CUDA 11/12, ROCm, Intel, CPU |
|
||||||
|
| **exllama2** | GPTQ inference library | CUDA 12 |
|
||||||
|
| **MLX** | Apple Silicon LLM inference | Metal (M1/M2/M3+) |
|
||||||
|
| **MLX-VLM** | Apple Silicon Vision-Language Models | Metal (M1/M2/M3+) |
|
||||||
|
|
||||||
|
### Audio & Speech Processing
|
||||||
|
| Backend | Description | Acceleration Support |
|
||||||
|
|---------|-------------|---------------------|
|
||||||
|
| **whisper.cpp** | OpenAI Whisper in C/C++ | CUDA 12, ROCm, Intel SYCL, Vulkan, CPU |
|
||||||
|
| **faster-whisper** | Fast Whisper with CTranslate2 | CUDA 12, ROCm, Intel, CPU |
|
||||||
|
| **bark** | Text-to-audio generation | CUDA 12, ROCm, Intel |
|
||||||
|
| **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU |
|
||||||
|
| **coqui** | Advanced TTS with 1100+ languages | CUDA 12, ROCm, Intel, CPU |
|
||||||
|
| **kokoro** | Lightweight TTS model | CUDA 12, ROCm, Intel, CPU |
|
||||||
|
| **chatterbox** | Production-grade TTS | CUDA 11/12, CPU |
|
||||||
|
| **piper** | Fast neural TTS system | CPU |
|
||||||
|
| **kitten-tts** | Kitten TTS models | CPU |
|
||||||
|
| **silero-vad** | Voice Activity Detection | CPU |
|
||||||
|
|
||||||
|
### Image & Video Generation
|
||||||
|
| Backend | Description | Acceleration Support |
|
||||||
|
|---------|-------------|---------------------|
|
||||||
|
| **stablediffusion.cpp** | Stable Diffusion in C/C++ | CUDA 12, Intel SYCL, Vulkan, CPU |
|
||||||
|
| **diffusers** | HuggingFace diffusion models | CUDA 11/12, ROCm, Intel, Metal, CPU |
|
||||||
|
|
||||||
|
### Specialized AI Tasks
|
||||||
|
| Backend | Description | Acceleration Support |
|
||||||
|
|---------|-------------|---------------------|
|
||||||
|
| **rfdetr** | Real-time object detection | CUDA 12, Intel, CPU |
|
||||||
|
| **rerankers** | Document reranking API | CUDA 11/12, ROCm, Intel, CPU |
|
||||||
|
| **local-store** | Vector database | CPU |
|
||||||
|
| **huggingface** | HuggingFace API integration | API-based |
|
||||||
|
|
||||||
|
### Hardware Acceleration Matrix
|
||||||
|
|
||||||
|
| Acceleration Type | Supported Backends | Hardware Support |
|
||||||
|
|-------------------|-------------------|------------------|
|
||||||
|
| **NVIDIA CUDA 11** | llama.cpp, whisper, stablediffusion, diffusers, rerankers, bark, chatterbox | Nvidia hardware |
|
||||||
|
| **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware |
|
||||||
|
| **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark | AMD Graphics |
|
||||||
|
| **Intel oneAPI** | llama.cpp, whisper, stablediffusion, vllm, transformers, diffusers, rfdetr, rerankers, exllama2, coqui, kokoro, bark | Intel Arc, Intel iGPUs |
|
||||||
|
| **Apple Metal** | llama.cpp, whisper, diffusers, MLX, MLX-VLM, bark-cpp | Apple M1/M2/M3+ |
|
||||||
|
| **Vulkan** | llama.cpp, whisper, stablediffusion | Cross-platform GPUs |
|
||||||
|
| **NVIDIA Jetson** | llama.cpp, whisper, stablediffusion, diffusers, rfdetr | ARM64 embedded AI |
|
||||||
|
| **CPU Optimized** | All backends | AVX/AVX2/AVX512, quantization support |
|
||||||
|
|
||||||
### 🔗 Community and integrations
|
### 🔗 Community and integrations
|
||||||
|
|
||||||
@@ -244,6 +307,9 @@ WebUIs:
|
|||||||
Model galleries
|
Model galleries
|
||||||
- https://github.com/go-skynet/model-gallery
|
- https://github.com/go-skynet/model-gallery
|
||||||
|
|
||||||
|
Voice:
|
||||||
|
- https://github.com/richiejp/VoxInput
|
||||||
|
|
||||||
Other:
|
Other:
|
||||||
- Helm chart https://github.com/go-skynet/helm-charts
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
embeddings: true
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
|
backend: llama-cpp
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ reranking: true
|
|||||||
f16: true
|
f16: true
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: jina-reranker-v1-tiny-en.f16.gguf
|
||||||
|
backend: llama-cpp
|
||||||
download_files:
|
download_files:
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: tts-1
|
|||||||
download_files:
|
download_files:
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
- filename: voice-en-us-amy-low.tar.gz
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||||
|
backend: piper
|
||||||
parameters:
|
parameters:
|
||||||
model: en-us-amy-low.onnx
|
model: en-us-amy-low.onnx
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
context_size: 8192
|
context_size: 8192
|
||||||
f16: true
|
f16: true
|
||||||
|
backend: llama-cpp
|
||||||
function:
|
function:
|
||||||
grammar:
|
grammar:
|
||||||
no_mixed_free_string: true
|
no_mixed_free_string: true
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
|
backend: llama-cpp
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||||
name: gpt-4o
|
name: gpt-4o
|
||||||
parameters:
|
parameters:
|
||||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||||
stopwords:
|
stopwords:
|
||||||
- <|im_end|>
|
- <|im_end|>
|
||||||
- <dummy32000>
|
- <dummy32000>
|
||||||
@@ -41,9 +42,9 @@ template:
|
|||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
embeddings: true
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
|
backend: llama-cpp
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ reranking: true
|
|||||||
f16: true
|
f16: true
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: jina-reranker-v1-tiny-en.f16.gguf
|
||||||
|
backend: llama-cpp
|
||||||
download_files:
|
download_files:
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: tts-1
|
|||||||
download_files:
|
download_files:
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
- filename: voice-en-us-amy-low.tar.gz
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||||
|
backend: piper
|
||||||
parameters:
|
parameters:
|
||||||
model: en-us-amy-low.onnx
|
model: en-us-amy-low.onnx
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
|
backend: llama-cpp
|
||||||
function:
|
function:
|
||||||
capture_llm_results:
|
capture_llm_results:
|
||||||
- (?s)<Thought>(.*?)</Thought>
|
- (?s)<Thought>(.*?)</Thought>
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
|
backend: llama-cpp
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||||
name: gpt-4o
|
name: gpt-4o
|
||||||
parameters:
|
parameters:
|
||||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||||
stopwords:
|
stopwords:
|
||||||
- <|im_end|>
|
- <|im_end|>
|
||||||
- <dummy32000>
|
- <dummy32000>
|
||||||
@@ -41,9 +42,9 @@ template:
|
|||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
embeddings: true
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
|
backend: llama-cpp
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ reranking: true
|
|||||||
f16: true
|
f16: true
|
||||||
parameters:
|
parameters:
|
||||||
model: jina-reranker-v1-tiny-en.f16.gguf
|
model: jina-reranker-v1-tiny-en.f16.gguf
|
||||||
|
backend: llama-cpp
|
||||||
download_files:
|
download_files:
|
||||||
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
- filename: jina-reranker-v1-tiny-en.f16.gguf
|
||||||
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: tts-1
|
|||||||
download_files:
|
download_files:
|
||||||
- filename: voice-en-us-amy-low.tar.gz
|
- filename: voice-en-us-amy-low.tar.gz
|
||||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||||
|
backend: piper
|
||||||
parameters:
|
parameters:
|
||||||
model: en-us-amy-low.onnx
|
model: en-us-amy-low.onnx
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
|
backend: llama-cpp
|
||||||
function:
|
function:
|
||||||
capture_llm_results:
|
capture_llm_results:
|
||||||
- (?s)<Thought>(.*?)</Thought>
|
- (?s)<Thought>(.*?)</Thought>
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
|
backend: llama-cpp
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
mmproj: minicpm-v-4_5-mmproj-f16.gguf
|
||||||
name: gpt-4o
|
name: gpt-4o
|
||||||
parameters:
|
parameters:
|
||||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
model: minicpm-v-4_5-Q4_K_M.gguf
|
||||||
stopwords:
|
stopwords:
|
||||||
- <|im_end|>
|
- <|im_end|>
|
||||||
- <dummy32000>
|
- <dummy32000>
|
||||||
@@ -42,9 +43,9 @@ template:
|
|||||||
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
- filename: minicpm-v-4_5-Q4_K_M.gguf
|
||||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
|
||||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
- filename: minicpm-v-4_5-mmproj-f16.gguf
|
||||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
|
||||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
|
||||||
15
assets.go
15
assets.go
@@ -1,15 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
rice "github.com/GeertJohan/go.rice"
|
|
||||||
)
|
|
||||||
|
|
||||||
var backendAssets *rice.Box
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
var err error
|
|
||||||
backendAssets, err = rice.FindBox("backend-assets")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -17,9 +17,9 @@ ARG GO_VERSION=1.22.6
|
|||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
build-essential \
|
build-essential \
|
||||||
ccache \
|
git ccache \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
make \
|
make cmake \
|
||||||
curl unzip \
|
curl unzip \
|
||||||
libssl-dev && \
|
libssl-dev && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
@@ -37,14 +37,27 @@ RUN <<EOT bash
|
|||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
echo "vulkan" > /run/localai/capability && \
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
apt-get update && \
|
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
apt-get install -y \
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
vulkan-sdk && \
|
apt-get update && \
|
||||||
apt-get clean && \
|
apt-get install -y \
|
||||||
rm -rf /var/lib/apt/lists/*
|
vulkan-sdk && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
|
# For ARM64, we need to build the Vulkan SDK manually as there are no packages available
|
||||||
|
mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
|
||||||
|
tar -xvf vulkan-sdk.tar.xz && \
|
||||||
|
rm vulkan-sdk.tar.xz && \
|
||||||
|
cd * && \
|
||||||
|
cp -rfv aarch64/* /usr/ && \
|
||||||
|
cd ../.. && \
|
||||||
|
rm -rf vulkan
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
@@ -123,9 +136,9 @@ EOT
|
|||||||
|
|
||||||
COPY . /LocalAI
|
COPY . /LocalAI
|
||||||
|
|
||||||
RUN cd /LocalAI && make backend-assets/grpc/bark-cpp
|
RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build
|
||||||
|
|
||||||
FROM scratch
|
FROM scratch
|
||||||
|
ARG BACKEND=rerankers
|
||||||
|
|
||||||
COPY --from=builder /LocalAI/backend-assets/grpc/bark-cpp ./
|
COPY --from=builder /LocalAI/backend/go/${BACKEND}/package/. ./
|
||||||
COPY --from=builder /LocalAI/backend/go/bark/run.sh ./
|
|
||||||
220
backend/Dockerfile.llama-cpp
Normal file
220
backend/Dockerfile.llama-cpp
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
|
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||||
|
|
||||||
|
|
||||||
|
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||||
|
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||||
|
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||||
|
|
||||||
|
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||||
|
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||||
|
ARG GRPC_VERSION=v1.65.0
|
||||||
|
ARG CMAKE_FROM_SOURCE=false
|
||||||
|
ARG CMAKE_VERSION=3.26.4
|
||||||
|
|
||||||
|
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates \
|
||||||
|
build-essential curl libssl-dev \
|
||||||
|
git && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install CMake (the version in 22.04 is too old)
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||||
|
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||||
|
else
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
cmake && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||||
|
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||||
|
# and running make install in the target container
|
||||||
|
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||||
|
mkdir -p /build/grpc/cmake/build && \
|
||||||
|
cd /build/grpc/cmake/build && \
|
||||||
|
sed -i "216i\ TESTONLY" "../../third_party/abseil-cpp/absl/container/CMakeLists.txt" && \
|
||||||
|
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX:PATH=/opt/grpc ../.. && \
|
||||||
|
make && \
|
||||||
|
make install && \
|
||||||
|
rm -rf /build
|
||||||
|
|
||||||
|
FROM ${BASE_IMAGE} AS builder
|
||||||
|
ARG BACKEND=rerankers
|
||||||
|
ARG BUILD_TYPE
|
||||||
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
ARG CUDA_MAJOR_VERSION
|
||||||
|
ARG CUDA_MINOR_VERSION
|
||||||
|
ARG SKIP_DRIVERS=false
|
||||||
|
ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION}
|
||||||
|
ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
ARG TARGETARCH
|
||||||
|
ARG TARGETVARIANT
|
||||||
|
ARG GO_VERSION=1.22.6
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
ccache git \
|
||||||
|
ca-certificates \
|
||||||
|
make \
|
||||||
|
curl unzip \
|
||||||
|
libssl-dev && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Cuda
|
||||||
|
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
|
# HipBLAS requirements
|
||||||
|
ENV PATH=/opt/rocm/bin:${PATH}
|
||||||
|
|
||||||
|
# Vulkan requirements
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
software-properties-common pciutils sudo wget gpg-agent curl xz-utils libxcb1 libx11-6 && \
|
||||||
|
echo "vulkan" > /run/localai/capability && \
|
||||||
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
|
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
vulkan-sdk && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
|
# For ARM64, we need to build the Vulkan SDK manually as there are no packages available
|
||||||
|
mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
|
||||||
|
tar -xvf vulkan-sdk.tar.xz && \
|
||||||
|
rm vulkan-sdk.tar.xz && \
|
||||||
|
cd * && \
|
||||||
|
cp -rfv aarch64/* /usr/ && vulkaninfo \
|
||||||
|
cd ../.. && \
|
||||||
|
rm -rf vulkan
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
# CuBLAS requirements
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
software-properties-common pciutils
|
||||||
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
|
fi
|
||||||
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||||
|
fi
|
||||||
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
|
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
|
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
|
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
|
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
libclblast-dev && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
hipblas-dev \
|
||||||
|
rocblas-dev && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
# I have no idea why, but the ROCM lib packages don't trigger ldconfig after they install, which results in local-ai and others not being able
|
||||||
|
# to locate the libraries. We run ldconfig ourselves to work around this packaging deficiency
|
||||||
|
ldconfig \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN echo "TARGETARCH: $TARGETARCH"
|
||||||
|
|
||||||
|
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||||
|
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||||
|
# here so that we can generate the grpc code for the stablediffusion build
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
fi
|
||||||
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
# Install CMake (the version in 22.04 is too old)
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
||||||
|
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
||||||
|
else
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
cmake && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
|
||||||
|
COPY . /LocalAI
|
||||||
|
|
||||||
|
## Otherwise just run the normal build
|
||||||
|
RUN <<EOT bash
|
||||||
|
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
|
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback && \
|
||||||
|
make llama-cpp-grpc && make llama-cpp-rpc-server; \
|
||||||
|
else \
|
||||||
|
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx && \
|
||||||
|
make llama-cpp-avx2 && \
|
||||||
|
make llama-cpp-avx512 && \
|
||||||
|
make llama-cpp-fallback && \
|
||||||
|
make llama-cpp-grpc && \
|
||||||
|
make llama-cpp-rpc-server; \
|
||||||
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
|
|
||||||
|
# Copy libraries using a script to handle architecture differences
|
||||||
|
RUN make -C /LocalAI/backend/cpp/llama-cpp package
|
||||||
|
|
||||||
|
|
||||||
|
FROM scratch
|
||||||
|
|
||||||
|
|
||||||
|
# Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
|
||||||
|
COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./
|
||||||
@@ -23,7 +23,7 @@ RUN apt-get update && \
|
|||||||
libssl-dev \
|
libssl-dev \
|
||||||
git \
|
git \
|
||||||
git-lfs \
|
git-lfs \
|
||||||
unzip \
|
unzip clang \
|
||||||
upx-ucl \
|
upx-ucl \
|
||||||
curl python3-pip \
|
curl python3-pip \
|
||||||
python-is-python3 \
|
python-is-python3 \
|
||||||
@@ -45,14 +45,27 @@ RUN <<EOT bash
|
|||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
software-properties-common pciutils sudo wget gpg-agent curl xz-utils && \
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
echo "vulkan" > /run/localai/capability && \
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
apt-get update && \
|
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
apt-get install -y \
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
vulkan-sdk && \
|
apt-get update && \
|
||||||
apt-get clean && \
|
apt-get install -y \
|
||||||
rm -rf /var/lib/apt/lists/*
|
vulkan-sdk && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
fi
|
||||||
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
|
# For ARM64, we need to build the Vulkan SDK manually as there are no packages available
|
||||||
|
mkdir vulkan && cd vulkan && curl -L -o vulkan-sdk.tar.xz https://github.com/mudler/vulkan-sdk-arm/releases/download/1.4.321.1/vulkansdk-ubuntu-22.04-arm-1.4.321.1.tar.xz && \
|
||||||
|
tar -xvf vulkan-sdk.tar.xz && \
|
||||||
|
rm vulkan-sdk.tar.xz && \
|
||||||
|
cd * && \
|
||||||
|
cp -rfv aarch64/* /usr/ && \
|
||||||
|
cd ../.. && \
|
||||||
|
rm -rf vulkan
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
@@ -116,7 +129,7 @@ COPY python/${BACKEND} /${BACKEND}
|
|||||||
COPY backend.proto /${BACKEND}/backend.proto
|
COPY backend.proto /${BACKEND}/backend.proto
|
||||||
COPY python/common/ /${BACKEND}/common
|
COPY python/common/ /${BACKEND}/common
|
||||||
|
|
||||||
RUN cd /${BACKEND} && make
|
RUN cd /${BACKEND} && PORTABLE_PYTHON=true make
|
||||||
|
|
||||||
FROM scratch
|
FROM scratch
|
||||||
ARG BACKEND=rerankers
|
ARG BACKEND=rerankers
|
||||||
|
|||||||
213
backend/README.md
Normal file
213
backend/README.md
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
# LocalAI Backend Architecture
|
||||||
|
|
||||||
|
This directory contains the core backend infrastructure for LocalAI, including the gRPC protocol definition, multi-language Dockerfiles, and language-specific backend implementations.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
LocalAI uses a unified gRPC-based architecture that allows different programming languages to implement AI backends while maintaining consistent interfaces and capabilities. The backend system supports multiple hardware acceleration targets and provides a standardized way to integrate various AI models and frameworks.
|
||||||
|
|
||||||
|
## Architecture Components
|
||||||
|
|
||||||
|
### 1. Protocol Definition (`backend.proto`)
|
||||||
|
|
||||||
|
The `backend.proto` file defines the gRPC service interface that all backends must implement. This ensures consistency across different language implementations and provides a contract for communication between LocalAI core and backend services.
|
||||||
|
|
||||||
|
#### Core Services
|
||||||
|
|
||||||
|
- **Text Generation**: `Predict`, `PredictStream` for LLM inference
|
||||||
|
- **Embeddings**: `Embedding` for text vectorization
|
||||||
|
- **Image Generation**: `GenerateImage` for stable diffusion and image models
|
||||||
|
- **Audio Processing**: `AudioTranscription`, `TTS`, `SoundGeneration`
|
||||||
|
- **Video Generation**: `GenerateVideo` for video synthesis
|
||||||
|
- **Object Detection**: `Detect` for computer vision tasks
|
||||||
|
- **Vector Storage**: `StoresSet`, `StoresGet`, `StoresFind` for RAG operations
|
||||||
|
- **Reranking**: `Rerank` for document relevance scoring
|
||||||
|
- **Voice Activity Detection**: `VAD` for audio segmentation
|
||||||
|
|
||||||
|
#### Key Message Types
|
||||||
|
|
||||||
|
- **`PredictOptions`**: Comprehensive configuration for text generation
|
||||||
|
- **`ModelOptions`**: Model loading and configuration parameters
|
||||||
|
- **`Result`**: Standardized response format
|
||||||
|
- **`StatusResponse`**: Backend health and memory usage information
|
||||||
|
|
||||||
|
### 2. Multi-Language Dockerfiles
|
||||||
|
|
||||||
|
The backend system provides language-specific Dockerfiles that handle the build environment and dependencies for different programming languages:
|
||||||
|
|
||||||
|
- `Dockerfile.python`
|
||||||
|
- `Dockerfile.golang`
|
||||||
|
- `Dockerfile.llama-cpp`
|
||||||
|
|
||||||
|
### 3. Language-Specific Implementations
|
||||||
|
|
||||||
|
#### Python Backends (`python/`)
|
||||||
|
- **transformers**: Hugging Face Transformers framework
|
||||||
|
- **vllm**: High-performance LLM inference
|
||||||
|
- **mlx**: Apple Silicon optimization
|
||||||
|
- **diffusers**: Stable Diffusion models
|
||||||
|
- **Audio**: bark, coqui, faster-whisper, kitten-tts
|
||||||
|
- **Vision**: mlx-vlm, rfdetr
|
||||||
|
- **Specialized**: rerankers, chatterbox, kokoro
|
||||||
|
|
||||||
|
#### Go Backends (`go/`)
|
||||||
|
- **whisper**: OpenAI Whisper speech recognition in Go with GGML cpp backend (whisper.cpp)
|
||||||
|
- **stablediffusion-ggml**: Stable Diffusion in Go with GGML Cpp backend
|
||||||
|
- **huggingface**: Hugging Face model integration
|
||||||
|
- **piper**: Text-to-speech synthesis Golang with C bindings using rhaspy/piper
|
||||||
|
- **bark-cpp**: Bark TTS models Golang with Cpp bindings
|
||||||
|
- **local-store**: Vector storage backend
|
||||||
|
|
||||||
|
#### C++ Backends (`cpp/`)
|
||||||
|
- **llama-cpp**: Llama.cpp integration
|
||||||
|
- **grpc**: GRPC utilities and helpers
|
||||||
|
|
||||||
|
## Hardware Acceleration Support
|
||||||
|
|
||||||
|
### CUDA (NVIDIA)
|
||||||
|
- **Versions**: CUDA 11.x, 12.x
|
||||||
|
- **Features**: cuBLAS, cuDNN, TensorRT optimization
|
||||||
|
- **Targets**: x86_64, ARM64 (Jetson)
|
||||||
|
|
||||||
|
### ROCm (AMD)
|
||||||
|
- **Features**: HIP, rocBLAS, MIOpen
|
||||||
|
- **Targets**: AMD GPUs with ROCm support
|
||||||
|
|
||||||
|
### Intel
|
||||||
|
- **Features**: oneAPI, Intel Extension for PyTorch
|
||||||
|
- **Targets**: Intel GPUs, XPUs, CPUs
|
||||||
|
|
||||||
|
### Vulkan
|
||||||
|
- **Features**: Cross-platform GPU acceleration
|
||||||
|
- **Targets**: Windows, Linux, Android, macOS
|
||||||
|
|
||||||
|
### Apple Silicon
|
||||||
|
- **Features**: MLX framework, Metal Performance Shaders
|
||||||
|
- **Targets**: M1/M2/M3 Macs
|
||||||
|
|
||||||
|
## Backend Registry (`index.yaml`)
|
||||||
|
|
||||||
|
The `index.yaml` file serves as a central registry for all available backends, providing:
|
||||||
|
|
||||||
|
- **Metadata**: Name, description, license, icons
|
||||||
|
- **Capabilities**: Hardware targets and optimization profiles
|
||||||
|
- **Tags**: Categorization for discovery
|
||||||
|
- **URLs**: Source code and documentation links
|
||||||
|
|
||||||
|
## Building Backends
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
- Docker with multi-architecture support
|
||||||
|
- Appropriate hardware drivers (CUDA, ROCm, etc.)
|
||||||
|
- Build tools (make, cmake, compilers)
|
||||||
|
|
||||||
|
### Build Commands
|
||||||
|
|
||||||
|
Example of build commands with Docker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build Python backend
|
||||||
|
docker build -f backend/Dockerfile.python \
|
||||||
|
--build-arg BACKEND=transformers \
|
||||||
|
--build-arg BUILD_TYPE=cublas12 \
|
||||||
|
--build-arg CUDA_MAJOR_VERSION=12 \
|
||||||
|
--build-arg CUDA_MINOR_VERSION=0 \
|
||||||
|
-t localai-backend-transformers .
|
||||||
|
|
||||||
|
# Build Go backend
|
||||||
|
docker build -f backend/Dockerfile.golang \
|
||||||
|
--build-arg BACKEND=whisper \
|
||||||
|
--build-arg BUILD_TYPE=cpu \
|
||||||
|
-t localai-backend-whisper .
|
||||||
|
|
||||||
|
# Build C++ backend
|
||||||
|
docker build -f backend/Dockerfile.llama-cpp \
|
||||||
|
--build-arg BACKEND=llama-cpp \
|
||||||
|
--build-arg BUILD_TYPE=cublas12 \
|
||||||
|
-t localai-backend-llama-cpp .
|
||||||
|
```
|
||||||
|
|
||||||
|
For ARM64/Mac builds, docker can't be used, and the makefile in the respective backend has to be used.
|
||||||
|
|
||||||
|
### Build Types
|
||||||
|
|
||||||
|
- **`cpu`**: CPU-only optimization
|
||||||
|
- **`cublas11`**: CUDA 11.x with cuBLAS
|
||||||
|
- **`cublas12`**: CUDA 12.x with cuBLAS
|
||||||
|
- **`hipblas`**: ROCm with rocBLAS
|
||||||
|
- **`intel`**: Intel oneAPI optimization
|
||||||
|
- **`vulkan`**: Vulkan-based acceleration
|
||||||
|
- **`metal`**: Apple Metal optimization
|
||||||
|
|
||||||
|
## Backend Development
|
||||||
|
|
||||||
|
### Creating a New Backend
|
||||||
|
|
||||||
|
1. **Choose Language**: Select Python, Go, or C++ based on requirements
|
||||||
|
2. **Implement Interface**: Implement the gRPC service defined in `backend.proto`
|
||||||
|
3. **Add Dependencies**: Create appropriate requirements files
|
||||||
|
4. **Configure Build**: Set up Dockerfile and build scripts
|
||||||
|
5. **Register Backend**: Add entry to `index.yaml`
|
||||||
|
6. **Test Integration**: Verify gRPC communication and functionality
|
||||||
|
|
||||||
|
### Backend Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
backend-name/
|
||||||
|
├── backend.py/go/cpp # Main implementation
|
||||||
|
├── requirements.txt # Dependencies
|
||||||
|
├── Dockerfile # Build configuration
|
||||||
|
├── install.sh # Installation script
|
||||||
|
├── run.sh # Execution script
|
||||||
|
├── test.sh # Test script
|
||||||
|
└── README.md # Backend documentation
|
||||||
|
```
|
||||||
|
|
||||||
|
### Required gRPC Methods
|
||||||
|
|
||||||
|
At minimum, backends must implement:
|
||||||
|
- `Health()` - Service health check
|
||||||
|
- `LoadModel()` - Model loading and initialization
|
||||||
|
- `Predict()` - Main inference endpoint
|
||||||
|
- `Status()` - Backend status and metrics
|
||||||
|
|
||||||
|
## Integration with LocalAI Core
|
||||||
|
|
||||||
|
Backends communicate with LocalAI core through gRPC:
|
||||||
|
|
||||||
|
1. **Service Discovery**: Core discovers available backends
|
||||||
|
2. **Model Loading**: Core requests model loading via `LoadModel`
|
||||||
|
3. **Inference**: Core sends requests via `Predict` or specialized endpoints
|
||||||
|
4. **Streaming**: Core handles streaming responses for real-time generation
|
||||||
|
5. **Monitoring**: Core tracks backend health and performance
|
||||||
|
|
||||||
|
## Performance Optimization
|
||||||
|
|
||||||
|
### Memory Management
|
||||||
|
- **Model Caching**: Efficient model loading and caching
|
||||||
|
- **Batch Processing**: Optimize for multiple concurrent requests
|
||||||
|
- **Memory Pinning**: GPU memory optimization for CUDA/ROCm
|
||||||
|
|
||||||
|
### Hardware Utilization
|
||||||
|
- **Multi-GPU**: Support for tensor parallelism
|
||||||
|
- **Mixed Precision**: FP16/BF16 for memory efficiency
|
||||||
|
- **Kernel Fusion**: Optimized CUDA/ROCm kernels
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **GRPC Connection**: Verify backend service is running and accessible
|
||||||
|
2. **Model Loading**: Check model paths and dependencies
|
||||||
|
3. **Hardware Detection**: Ensure appropriate drivers and libraries
|
||||||
|
4. **Memory Issues**: Monitor GPU memory usage and model sizes
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
When contributing to the backend system:
|
||||||
|
|
||||||
|
1. **Follow Protocol**: Implement the exact gRPC interface
|
||||||
|
2. **Add Tests**: Include comprehensive test coverage
|
||||||
|
3. **Document**: Provide clear usage examples
|
||||||
|
4. **Optimize**: Consider performance and resource usage
|
||||||
|
5. **Validate**: Test across different hardware targets
|
||||||
@@ -20,6 +20,7 @@ service Backend {
|
|||||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
||||||
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
||||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||||
|
rpc Detect(DetectOptions) returns (DetectResponse) {}
|
||||||
|
|
||||||
rpc StoresSet(StoresSetOptions) returns (Result) {}
|
rpc StoresSet(StoresSetOptions) returns (Result) {}
|
||||||
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
||||||
@@ -185,7 +186,6 @@ message ModelOptions {
|
|||||||
string MainGPU = 13;
|
string MainGPU = 13;
|
||||||
string TensorSplit = 14;
|
string TensorSplit = 14;
|
||||||
int32 Threads = 15;
|
int32 Threads = 15;
|
||||||
string LibrarySearchPath = 16;
|
|
||||||
float RopeFreqBase = 17;
|
float RopeFreqBase = 17;
|
||||||
float RopeFreqScale = 18;
|
float RopeFreqScale = 18;
|
||||||
float RMSNormEps = 19;
|
float RMSNormEps = 19;
|
||||||
@@ -242,7 +242,7 @@ message ModelOptions {
|
|||||||
|
|
||||||
string Type = 49;
|
string Type = 49;
|
||||||
|
|
||||||
bool FlashAttention = 56;
|
string FlashAttention = 56;
|
||||||
bool NoKVOffload = 57;
|
bool NoKVOffload = 57;
|
||||||
|
|
||||||
string ModelPath = 59;
|
string ModelPath = 59;
|
||||||
@@ -258,6 +258,8 @@ message ModelOptions {
|
|||||||
repeated GrammarTrigger GrammarTriggers = 65;
|
repeated GrammarTrigger GrammarTriggers = 65;
|
||||||
|
|
||||||
bool Reranking = 71;
|
bool Reranking = 71;
|
||||||
|
|
||||||
|
repeated string Overrides = 72;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -274,6 +276,7 @@ message TranscriptRequest {
|
|||||||
string language = 3;
|
string language = 3;
|
||||||
uint32 threads = 4;
|
uint32 threads = 4;
|
||||||
bool translate = 5;
|
bool translate = 5;
|
||||||
|
bool diarize = 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
message TranscriptResult {
|
message TranscriptResult {
|
||||||
@@ -303,19 +306,24 @@ message GenerateImageRequest {
|
|||||||
// Diffusers
|
// Diffusers
|
||||||
string EnableParameters = 10;
|
string EnableParameters = 10;
|
||||||
int32 CLIPSkip = 11;
|
int32 CLIPSkip = 11;
|
||||||
|
|
||||||
|
// Reference images for models that support them (e.g., Flux Kontext)
|
||||||
|
repeated string ref_images = 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
message GenerateVideoRequest {
|
message GenerateVideoRequest {
|
||||||
string prompt = 1;
|
string prompt = 1;
|
||||||
string start_image = 2; // Path or base64 encoded image for the start frame
|
string negative_prompt = 2; // Negative prompt for video generation
|
||||||
string end_image = 3; // Path or base64 encoded image for the end frame
|
string start_image = 3; // Path or base64 encoded image for the start frame
|
||||||
int32 width = 4;
|
string end_image = 4; // Path or base64 encoded image for the end frame
|
||||||
int32 height = 5;
|
int32 width = 5;
|
||||||
int32 num_frames = 6; // Number of frames to generate
|
int32 height = 6;
|
||||||
int32 fps = 7; // Frames per second
|
int32 num_frames = 7; // Number of frames to generate
|
||||||
int32 seed = 8;
|
int32 fps = 8; // Frames per second
|
||||||
float cfg_scale = 9; // Classifier-free guidance scale
|
int32 seed = 9;
|
||||||
string dst = 10; // Output path for the generated video
|
float cfg_scale = 10; // Classifier-free guidance scale
|
||||||
|
int32 step = 11; // Number of inference steps
|
||||||
|
string dst = 12; // Output path for the generated video
|
||||||
}
|
}
|
||||||
|
|
||||||
message TTSRequest {
|
message TTSRequest {
|
||||||
@@ -375,3 +383,20 @@ message Message {
|
|||||||
string role = 1;
|
string role = 1;
|
||||||
string content = 2;
|
string content = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message DetectOptions {
|
||||||
|
string src = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Detection {
|
||||||
|
float x = 1;
|
||||||
|
float y = 2;
|
||||||
|
float width = 3;
|
||||||
|
float height = 4;
|
||||||
|
float confidence = 5;
|
||||||
|
string class_name = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
message DetectResponse {
|
||||||
|
repeated Detection Detections = 1;
|
||||||
|
}
|
||||||
|
|||||||
166
backend/cpp/llama-cpp/Makefile
Normal file
166
backend/cpp/llama-cpp/Makefile
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
|
||||||
|
LLAMA_VERSION?=4807e8f96a61b2adccebd5e57444c94d18de7264
|
||||||
|
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||||
|
|
||||||
|
CMAKE_ARGS?=
|
||||||
|
BUILD_TYPE?=
|
||||||
|
NATIVE?=false
|
||||||
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
|
TARGET?=--target grpc-server
|
||||||
|
JOBS?=$(shell nproc)
|
||||||
|
|
||||||
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||||
|
|
||||||
|
CURRENT_MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
ifeq ($(NATIVE),false)
|
||||||
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF -DLLAMA_OPENSSL=OFF
|
||||||
|
endif
|
||||||
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
# to CMAKE_ARGS automatically
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
else ifeq ($(BUILD_TYPE),clblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
ROCM_HOME ?= /opt/rocm
|
||||||
|
ROCM_PATH ?= /opt/rocm
|
||||||
|
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||||
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
|
AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
|
||||||
|
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
|
||||||
|
else ifeq ($(BUILD_TYPE),vulkan)
|
||||||
|
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||||
|
else ifeq ($(OS),Darwin)
|
||||||
|
ifeq ($(BUILD_TYPE),)
|
||||||
|
BUILD_TYPE=metal
|
||||||
|
endif
|
||||||
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
|
else
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||||
|
endif
|
||||||
|
TARGET+=--target ggml-metal
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||||
|
-DGGML_SYCL_F16=ON
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||||
|
endif
|
||||||
|
|
||||||
|
INSTALLED_PACKAGES=$(CURDIR)/../grpc/installed_packages
|
||||||
|
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||||
|
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||||
|
-DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
|
||||||
|
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||||
|
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||||
|
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||||
|
build-llama-cpp-grpc-server:
|
||||||
|
# Conditionally build grpc for the llama backend to use if needed
|
||||||
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
|
$(MAKE) -C ../../grpc build
|
||||||
|
_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
|
||||||
|
_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
|
||||||
|
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
||||||
|
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
||||||
|
LLAMA_VERSION=$(LLAMA_VERSION) \
|
||||||
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
||||||
|
else
|
||||||
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
|
LLAMA_VERSION=$(LLAMA_VERSION) $(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(VARIANT) grpc-server
|
||||||
|
endif
|
||||||
|
|
||||||
|
llama-cpp-avx2: llama.cpp
|
||||||
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
|
||||||
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
|
||||||
|
|
||||||
|
llama-cpp-avx512: llama.cpp
|
||||||
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
|
||||||
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
|
||||||
|
|
||||||
|
llama-cpp-avx: llama.cpp
|
||||||
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
|
||||||
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
|
||||||
|
|
||||||
|
llama-cpp-fallback: llama.cpp
|
||||||
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
|
||||||
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||||
|
|
||||||
|
llama-cpp-grpc: llama.cpp
|
||||||
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||||
|
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
||||||
|
|
||||||
|
llama-cpp-rpc-server: llama-cpp-grpc
|
||||||
|
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
||||||
|
|
||||||
|
llama.cpp:
|
||||||
|
mkdir -p llama.cpp
|
||||||
|
cd llama.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(LLAMA_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout -b build $(LLAMA_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
llama.cpp/tools/grpc-server: llama.cpp
|
||||||
|
mkdir -p llama.cpp/tools/grpc-server
|
||||||
|
bash prepare.sh
|
||||||
|
|
||||||
|
rebuild:
|
||||||
|
bash prepare.sh
|
||||||
|
rm -rf grpc-server
|
||||||
|
$(MAKE) grpc-server
|
||||||
|
|
||||||
|
package:
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
purge:
|
||||||
|
rm -rf llama.cpp/build
|
||||||
|
rm -rf llama.cpp/tools/grpc-server
|
||||||
|
rm -rf grpc-server
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf llama.cpp
|
||||||
|
|
||||||
|
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||||
|
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release -j $(JOBS) $(TARGET)"
|
||||||
|
else
|
||||||
|
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release -j $(JOBS) $(TARGET)
|
||||||
|
endif
|
||||||
|
cp llama.cpp/build/bin/grpc-server .
|
||||||
@@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
#include "backend.pb.h"
|
#include "backend.pb.h"
|
||||||
#include "backend.grpc.pb.h"
|
#include "backend.grpc.pb.h"
|
||||||
|
#include "common.h"
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||||
#include <grpcpp/grpcpp.h>
|
#include <grpcpp/grpcpp.h>
|
||||||
@@ -52,9 +53,9 @@ static void start_llama_server(server_context& ctx_server) {
|
|||||||
LOG_INF("%s: model loaded\n", __func__);
|
LOG_INF("%s: model loaded\n", __func__);
|
||||||
|
|
||||||
// print sample chat example to make it clear which template is used
|
// print sample chat example to make it clear which template is used
|
||||||
LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
|
// LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
|
||||||
common_chat_templates_source(ctx_server.chat_templates.get()),
|
// common_chat_templates_source(ctx_server.chat_templates.get()),
|
||||||
common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str());
|
// common_chat_format_example(ctx_server.chat_templates.get(), ctx_server.params_base.use_jinja).c_str(), ctx_server.params_base.default_template_kwargs);
|
||||||
|
|
||||||
// Reset the chat templates
|
// Reset the chat templates
|
||||||
// TODO: We should make this configurable by respecting the option that is already present in LocalAI for vLLM
|
// TODO: We should make this configurable by respecting the option that is already present in LocalAI for vLLM
|
||||||
@@ -230,6 +231,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.cpuparams.n_threads = request->threads();
|
params.cpuparams.n_threads = request->threads();
|
||||||
params.n_gpu_layers = request->ngpulayers();
|
params.n_gpu_layers = request->ngpulayers();
|
||||||
params.n_batch = request->nbatch();
|
params.n_batch = request->nbatch();
|
||||||
|
params.n_ubatch = request->nbatch(); // fixes issue with reranking models being limited to 512 tokens (the default n_ubatch size); allows for setting the maximum input amount of tokens thereby avoiding this error "input is too large to process. increase the physical batch size"
|
||||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||||
//params.n_parallel = 1;
|
//params.n_parallel = 1;
|
||||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||||
@@ -260,6 +262,13 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add kv_overrides
|
||||||
|
if (request->overrides_size() > 0) {
|
||||||
|
for (int i = 0; i < request->overrides_size(); i++) {
|
||||||
|
string_parse_kv_override(request->overrides(i).c_str(), params.kv_overrides);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Add yarn
|
// TODO: Add yarn
|
||||||
|
|
||||||
if (!request->tensorsplit().empty()) {
|
if (!request->tensorsplit().empty()) {
|
||||||
@@ -296,7 +305,15 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
}
|
}
|
||||||
params.use_mlock = request->mlock();
|
params.use_mlock = request->mlock();
|
||||||
params.use_mmap = request->mmap();
|
params.use_mmap = request->mmap();
|
||||||
params.flash_attn = request->flashattention();
|
|
||||||
|
if (request->flashattention() == "on" || request->flashattention() == "enabled") {
|
||||||
|
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
|
||||||
|
} else if (request->flashattention() == "off" || request->flashattention() == "disabled") {
|
||||||
|
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
|
||||||
|
} else if (request->flashattention() == "auto") {
|
||||||
|
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
|
||||||
|
}
|
||||||
|
|
||||||
params.no_kv_offload = request->nokvoffload();
|
params.no_kv_offload = request->nokvoffload();
|
||||||
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
|
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
|
||||||
|
|
||||||
@@ -305,9 +322,11 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.pooling_type = LLAMA_POOLING_TYPE_RANK;
|
params.pooling_type = LLAMA_POOLING_TYPE_RANK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
||||||
else if (request->ropescaling() == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
|
else if (request->ropescaling() == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
|
||||||
else { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
|
else if (request->ropescaling() == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
|
||||||
|
|
||||||
if ( request->yarnextfactor() != 0.0f ) {
|
if ( request->yarnextfactor() != 0.0f ) {
|
||||||
params.yarn_ext_factor = request->yarnextfactor();
|
params.yarn_ext_factor = request->yarnextfactor();
|
||||||
}
|
}
|
||||||
@@ -427,24 +446,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// process files
|
|
||||||
mtmd::bitmaps bitmaps;
|
|
||||||
const bool has_mtmd = ctx_server.mctx != nullptr;
|
const bool has_mtmd = ctx_server.mctx != nullptr;
|
||||||
{
|
|
||||||
if (!has_mtmd && !files.empty()) {
|
|
||||||
throw std::runtime_error("This server does not support multimodal");
|
|
||||||
}
|
|
||||||
for (auto & file : files) {
|
|
||||||
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(ctx_server.mctx, file.data(), file.size()));
|
|
||||||
if (!bmp.ptr) {
|
|
||||||
throw std::runtime_error("Failed to load image/audio");
|
|
||||||
}
|
|
||||||
// calculate bitmap hash (for KV caching)
|
|
||||||
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
|
|
||||||
bmp.set_id(hash.c_str());
|
|
||||||
bitmaps.entries.push_back(std::move(bmp));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// process prompt
|
// process prompt
|
||||||
std::vector<server_tokens> inputs;
|
std::vector<server_tokens> inputs;
|
||||||
@@ -454,32 +456,10 @@ public:
|
|||||||
|
|
||||||
if (has_mtmd) {
|
if (has_mtmd) {
|
||||||
// multimodal
|
// multimodal
|
||||||
std::string prompt_str = prompt.get<std::string>();
|
inputs.push_back(process_mtmd_prompt(ctx_server.mctx, prompt.get<std::string>(), files));
|
||||||
mtmd_input_text inp_txt = {
|
|
||||||
prompt_str.c_str(),
|
|
||||||
/* add_special */ true,
|
|
||||||
/* parse_special */ true,
|
|
||||||
};
|
|
||||||
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
|
||||||
auto bitmaps_c_ptr = bitmaps.c_ptr();
|
|
||||||
int32_t tokenized = mtmd_tokenize(ctx_server.mctx,
|
|
||||||
chunks.ptr.get(),
|
|
||||||
&inp_txt,
|
|
||||||
bitmaps_c_ptr.data(),
|
|
||||||
bitmaps_c_ptr.size());
|
|
||||||
if (tokenized != 0) {
|
|
||||||
throw std::runtime_error("Failed to tokenize prompt");
|
|
||||||
}
|
|
||||||
|
|
||||||
server_tokens tmp(chunks, true);
|
|
||||||
inputs.push_back(std::move(tmp));
|
|
||||||
} else {
|
} else {
|
||||||
// non-multimodal version
|
// Everything else, including multimodal completions.
|
||||||
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
|
inputs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
|
||||||
for (auto & p : tokenized_prompts) {
|
|
||||||
auto tmp = server_tokens(p, ctx_server.mctx != nullptr);
|
|
||||||
inputs.push_back(std::move(tmp));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks.reserve(inputs.size());
|
tasks.reserve(inputs.size());
|
||||||
@@ -620,23 +600,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// process files
|
// process files
|
||||||
mtmd::bitmaps bitmaps;
|
|
||||||
const bool has_mtmd = ctx_server.mctx != nullptr;
|
const bool has_mtmd = ctx_server.mctx != nullptr;
|
||||||
{
|
|
||||||
if (!has_mtmd && !files.empty()) {
|
|
||||||
throw std::runtime_error("This server does not support multimodal");
|
|
||||||
}
|
|
||||||
for (auto & file : files) {
|
|
||||||
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(ctx_server.mctx, file.data(), file.size()));
|
|
||||||
if (!bmp.ptr) {
|
|
||||||
throw std::runtime_error("Failed to load image/audio");
|
|
||||||
}
|
|
||||||
// calculate bitmap hash (for KV caching)
|
|
||||||
std::string hash = fnv_hash(bmp.data(), bmp.n_bytes());
|
|
||||||
bmp.set_id(hash.c_str());
|
|
||||||
bitmaps.entries.push_back(std::move(bmp));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// process prompt
|
// process prompt
|
||||||
std::vector<server_tokens> inputs;
|
std::vector<server_tokens> inputs;
|
||||||
@@ -647,33 +611,10 @@ public:
|
|||||||
|
|
||||||
if (has_mtmd) {
|
if (has_mtmd) {
|
||||||
// multimodal
|
// multimodal
|
||||||
std::string prompt_str = prompt.get<std::string>();
|
inputs.push_back(process_mtmd_prompt(ctx_server.mctx, prompt.get<std::string>(), files));
|
||||||
mtmd_input_text inp_txt = {
|
|
||||||
prompt_str.c_str(),
|
|
||||||
/* add_special */ true,
|
|
||||||
/* parse_special */ true,
|
|
||||||
};
|
|
||||||
mtmd::input_chunks chunks(mtmd_input_chunks_init());
|
|
||||||
auto bitmaps_c_ptr = bitmaps.c_ptr();
|
|
||||||
int32_t tokenized = mtmd_tokenize(ctx_server.mctx,
|
|
||||||
chunks.ptr.get(),
|
|
||||||
&inp_txt,
|
|
||||||
bitmaps_c_ptr.data(),
|
|
||||||
bitmaps_c_ptr.size());
|
|
||||||
if (tokenized != 0) {
|
|
||||||
std::cout << "[PREDICT] Failed to tokenize prompt" << std::endl;
|
|
||||||
throw std::runtime_error("Failed to tokenize prompt");
|
|
||||||
}
|
|
||||||
|
|
||||||
server_tokens tmp(chunks, true);
|
|
||||||
inputs.push_back(std::move(tmp));
|
|
||||||
} else {
|
} else {
|
||||||
// non-multimodal version
|
// Everything else, including multimodal completions.
|
||||||
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
|
inputs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
|
||||||
for (auto & p : tokenized_prompts) {
|
|
||||||
auto tmp = server_tokens(p, ctx_server.mctx != nullptr);
|
|
||||||
inputs.push_back(std::move(tmp));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks.reserve(inputs.size());
|
tasks.reserve(inputs.size());
|
||||||
@@ -761,10 +702,10 @@ public:
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
// for the shape of input/content, see tokenize_input_prompts()
|
// for the shape of input/content, see tokenize_input_prompts()
|
||||||
json prompt = body.at("prompt");
|
json prompt = body.at("embeddings");
|
||||||
|
|
||||||
|
|
||||||
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
|
auto tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
|
||||||
for (const auto & tokens : tokenized_prompts) {
|
for (const auto & tokens : tokenized_prompts) {
|
||||||
// this check is necessary for models that do not add BOS token to the input
|
// this check is necessary for models that do not add BOS token to the input
|
||||||
if (tokens.empty()) {
|
if (tokens.empty()) {
|
||||||
@@ -772,6 +713,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int embd_normalize = 2; // default to Euclidean/L2 norm
|
||||||
// create and queue the task
|
// create and queue the task
|
||||||
json responses = json::array();
|
json responses = json::array();
|
||||||
bool error = false;
|
bool error = false;
|
||||||
@@ -783,11 +725,10 @@ public:
|
|||||||
|
|
||||||
task.id = ctx_server.queue_tasks.get_new_id();
|
task.id = ctx_server.queue_tasks.get_new_id();
|
||||||
task.index = i;
|
task.index = i;
|
||||||
task.prompt_tokens = server_tokens(tokenized_prompts[i], ctx_server.mctx != nullptr);
|
task.prompt_tokens = std::move(tokenized_prompts[i]);
|
||||||
|
|
||||||
// OAI-compat
|
|
||||||
task.params.oaicompat = OAICOMPAT_TYPE_EMBEDDING;
|
|
||||||
|
|
||||||
|
task.params.oaicompat = OAICOMPAT_TYPE_NONE;
|
||||||
|
task.params.embd_normalize = embd_normalize;
|
||||||
tasks.push_back(std::move(task));
|
tasks.push_back(std::move(task));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -803,9 +744,8 @@ public:
|
|||||||
responses.push_back(res->to_json());
|
responses.push_back(res->to_json());
|
||||||
}
|
}
|
||||||
}, [&](const json & error_data) {
|
}, [&](const json & error_data) {
|
||||||
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, error_data.value("content", ""));
|
error = true;
|
||||||
}, [&]() {
|
}, [&]() {
|
||||||
// NOTE: we should try to check when the writer is closed here
|
|
||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -815,12 +755,36 @@ public:
|
|||||||
return grpc::Status(grpc::StatusCode::INTERNAL, "Error in receiving results");
|
return grpc::Status(grpc::StatusCode::INTERNAL, "Error in receiving results");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<float> embeddings = responses[0].value("embedding", std::vector<float>());
|
std::cout << "[DEBUG] Responses size: " << responses.size() << std::endl;
|
||||||
// loop the vector and set the embeddings results
|
|
||||||
for (int i = 0; i < embeddings.size(); i++) {
|
// Process the responses and extract embeddings
|
||||||
embeddingResult->add_embeddings(embeddings[i]);
|
for (const auto & response_elem : responses) {
|
||||||
|
// Check if the response has an "embedding" field
|
||||||
|
if (response_elem.contains("embedding")) {
|
||||||
|
json embedding_data = json_value(response_elem, "embedding", json::array());
|
||||||
|
|
||||||
|
if (embedding_data.is_array() && !embedding_data.empty()) {
|
||||||
|
for (const auto & embedding_vector : embedding_data) {
|
||||||
|
if (embedding_vector.is_array()) {
|
||||||
|
for (const auto & embedding_value : embedding_vector) {
|
||||||
|
embeddingResult->add_embeddings(embedding_value.get<float>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Check if the response itself contains the embedding data directly
|
||||||
|
if (response_elem.is_array()) {
|
||||||
|
for (const auto & embedding_value : response_elem) {
|
||||||
|
embeddingResult->add_embeddings(embedding_value.get<float>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -838,9 +802,6 @@ public:
|
|||||||
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"documents\" must be a non-empty string array");
|
return grpc::Status(grpc::StatusCode::INVALID_ARGUMENT, "\"documents\" must be a non-empty string array");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tokenize the query
|
|
||||||
llama_tokens tokenized_query = tokenize_input_prompts(ctx_server.vocab, request->query(), /* add_special */ false, true)[0];
|
|
||||||
|
|
||||||
// Create and queue the task
|
// Create and queue the task
|
||||||
json responses = json::array();
|
json responses = json::array();
|
||||||
bool error = false;
|
bool error = false;
|
||||||
@@ -852,14 +813,13 @@ public:
|
|||||||
documents.push_back(request->documents(i));
|
documents.push_back(request->documents(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto tokenized_docs = tokenize_input_prompts(ctx_server.vocab, documents, /* add_special */ false, true);
|
tasks.reserve(documents.size());
|
||||||
tasks.reserve(tokenized_docs.size());
|
for (size_t i = 0; i < documents.size(); i++) {
|
||||||
for (size_t i = 0; i < tokenized_docs.size(); i++) {
|
auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
|
||||||
auto tmp = format_rerank(ctx_server.vocab, tokenized_query, tokenized_docs[i]);
|
|
||||||
server_task task = server_task(SERVER_TASK_TYPE_RERANK);
|
server_task task = server_task(SERVER_TASK_TYPE_RERANK);
|
||||||
task.id = ctx_server.queue_tasks.get_new_id();
|
task.id = ctx_server.queue_tasks.get_new_id();
|
||||||
task.index = i;
|
task.index = i;
|
||||||
task.prompt_tokens = server_tokens(tmp, ctx_server.mctx != nullptr);
|
task.prompt_tokens = std::move(tmp);
|
||||||
tasks.push_back(std::move(task));
|
tasks.push_back(std::move(task));
|
||||||
}
|
}
|
||||||
|
|
||||||
42
backend/cpp/llama-cpp/package.sh
Executable file
42
backend/cpp/llama-cpp/package.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to copy the appropriate libraries based on architecture
|
||||||
|
# This script is used in the final stage of the Dockerfile
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
# Create lib directory
|
||||||
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
|
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
||||||
|
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
# Detect architecture and copy appropriate libraries
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
# x86_64 architecture
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
# ARM64 architecture
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah $CURDIR/package/
|
||||||
|
ls -liah $CURDIR/package/lib/
|
||||||
62
backend/cpp/llama-cpp/run.sh
Executable file
62
backend/cpp/llama-cpp/run.sh
Executable file
@@ -0,0 +1,62 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
# Get the absolute current dir where the script is located
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
cd /
|
||||||
|
|
||||||
|
echo "CPU info:"
|
||||||
|
grep -e "model\sname" /proc/cpuinfo | head -1
|
||||||
|
grep -e "flags" /proc/cpuinfo | head -1
|
||||||
|
|
||||||
|
BINARY=llama-cpp-fallback
|
||||||
|
|
||||||
|
if grep -q -e "\savx\s" /proc/cpuinfo ; then
|
||||||
|
echo "CPU: AVX found OK"
|
||||||
|
if [ -e $CURDIR/llama-cpp-avx ]; then
|
||||||
|
BINARY=llama-cpp-avx
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if grep -q -e "\savx2\s" /proc/cpuinfo ; then
|
||||||
|
echo "CPU: AVX2 found OK"
|
||||||
|
if [ -e $CURDIR/llama-cpp-avx2 ]; then
|
||||||
|
BINARY=llama-cpp-avx2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check avx 512
|
||||||
|
if grep -q -e "\savx512f\s" /proc/cpuinfo ; then
|
||||||
|
echo "CPU: AVX512F found OK"
|
||||||
|
if [ -e $CURDIR/llama-cpp-avx512 ]; then
|
||||||
|
BINARY=llama-cpp-avx512
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$LLAMACPP_GRPC_SERVERS" ]; then
|
||||||
|
if [ -e $CURDIR/llama-cpp-grpc ]; then
|
||||||
|
BINARY=llama-cpp-grpc
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extend ld library path with the dir where this script is located/lib
|
||||||
|
if [ "$(uname)" == "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH=$CURDIR/lib:$DYLD_LIBRARY_PATH
|
||||||
|
#export DYLD_FALLBACK_LIBRARY_PATH=$CURDIR/lib:$DYLD_FALLBACK_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
# If there is a lib/ld.so, use it
|
||||||
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
echo "Using binary: $BINARY"
|
||||||
|
exec $CURDIR/lib/ld.so $CURDIR/$BINARY "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Using binary: $BINARY"
|
||||||
|
exec $CURDIR/$BINARY "$@"
|
||||||
|
|
||||||
|
# We should never reach this point, however just in case we do, run fallback
|
||||||
|
exec $CURDIR/llama-cpp-fallback "$@"
|
||||||
@@ -1,87 +0,0 @@
|
|||||||
|
|
||||||
LLAMA_VERSION?=
|
|
||||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
|
||||||
|
|
||||||
CMAKE_ARGS?=
|
|
||||||
BUILD_TYPE?=
|
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|
||||||
TARGET?=--target grpc-server
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# to CMAKE_ARGS automatically
|
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
|
||||||
# But if it's OSX without metal, disable it here
|
|
||||||
else ifeq ($(OS),Darwin)
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DCMAKE_CXX_FLAGS="-fsycl" \
|
|
||||||
-DGGML_SYCL_F16=ON
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
|
||||||
endif
|
|
||||||
|
|
||||||
llama.cpp:
|
|
||||||
mkdir -p llama.cpp
|
|
||||||
cd llama.cpp && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(LLAMA_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout -b build $(LLAMA_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
llama.cpp/tools/grpc-server: llama.cpp
|
|
||||||
mkdir -p llama.cpp/tools/grpc-server
|
|
||||||
bash prepare.sh
|
|
||||||
|
|
||||||
rebuild:
|
|
||||||
bash prepare.sh
|
|
||||||
rm -rf grpc-server
|
|
||||||
$(MAKE) grpc-server
|
|
||||||
|
|
||||||
purge:
|
|
||||||
rm -rf llama.cpp/build
|
|
||||||
rm -rf llama.cpp/tools/grpc-server
|
|
||||||
rm -rf grpc-server
|
|
||||||
|
|
||||||
clean: purge
|
|
||||||
rm -rf llama.cpp
|
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
|
||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"
|
|
||||||
else
|
|
||||||
+cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)
|
|
||||||
endif
|
|
||||||
cp llama.cpp/build/bin/grpc-server .
|
|
||||||
51
backend/go/bark-cpp/Makefile
Normal file
51
backend/go/bark-cpp/Makefile
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
INCLUDE_PATH := $(abspath ./)
|
||||||
|
LIBRARY_PATH := $(abspath ./)
|
||||||
|
|
||||||
|
AR?=ar
|
||||||
|
|
||||||
|
CMAKE_ARGS?=-DGGML_NATIVE=OFF
|
||||||
|
BUILD_TYPE?=
|
||||||
|
GOCMD=go
|
||||||
|
# keep standard at C11 and C++11
|
||||||
|
CXXFLAGS = -I. -I$(INCLUDE_PATH)/sources/bark.cpp/examples -I$(INCLUDE_PATH)/sources/bark.cpp/encodec.cpp/ggml/include -I$(INCLUDE_PATH)/sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||||
|
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
||||||
|
|
||||||
|
# bark.cpp
|
||||||
|
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||||
|
BARKCPP_VERSION?=5d5be84f089ab9ea53b7a793f088d3fbf7247495
|
||||||
|
|
||||||
|
# warnings
|
||||||
|
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||||
|
|
||||||
|
## bark.cpp
|
||||||
|
sources/bark.cpp:
|
||||||
|
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||||
|
cd sources/bark.cpp && \
|
||||||
|
git checkout $(BARKCPP_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
||||||
|
cd sources/bark.cpp && \
|
||||||
|
mkdir -p build && \
|
||||||
|
cd build && \
|
||||||
|
cmake $(CMAKE_ARGS) .. && \
|
||||||
|
cmake --build . --config Release
|
||||||
|
|
||||||
|
gobark.o:
|
||||||
|
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
||||||
|
|
||||||
|
libbark.a: sources/bark.cpp/build/libbark.a gobark.o
|
||||||
|
cp $(INCLUDE_PATH)/sources/bark.cpp/build/libbark.a ./
|
||||||
|
$(AR) rcs libbark.a gobark.o
|
||||||
|
|
||||||
|
bark-cpp: libbark.a
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH="$(CURDIR)" LIBRARY_PATH=$(CURDIR) \
|
||||||
|
$(GOCMD) build -v -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o bark-cpp ./
|
||||||
|
|
||||||
|
package:
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: bark-cpp package
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f gobark.o libbark.a
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
// #cgo CXXFLAGS: -I${SRCDIR}/sources/bark.cpp/ -I${SRCDIR}/sources/bark.cpp/encodec.cpp -I${SRCDIR}/sources/bark.cpp/encodec.cpp/ggml/include -I${SRCDIR}/sources/bark.cpp/examples -I${SRCDIR}/sources/bark.cpp/spm-headers
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/sources/bark.cpp/build/examples -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ggml/src/ -L${SRCDIR}/sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon -lggml -lgomp
|
||||||
// #include <gobark.h>
|
// #include <gobark.h>
|
||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
import "C"
|
import "C"
|
||||||
41
backend/go/bark-cpp/package.sh
Executable file
41
backend/go/bark-cpp/package.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to copy the appropriate libraries based on architecture
|
||||||
|
# This script is used in the final stage of the Dockerfile
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
# Create lib directory
|
||||||
|
mkdir -p $CURDIR/package/lib
|
||||||
|
cp -avrf $CURDIR/bark-cpp $CURDIR/package/
|
||||||
|
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
|
||||||
|
# Detect architecture and copy appropriate libraries
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
# x86_64 architecture
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
# ARM64 architecture
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah $CURDIR/package/
|
||||||
|
ls -liah $CURDIR/package/lib/
|
||||||
13
backend/go/bark-cpp/run.sh
Executable file
13
backend/go/bark-cpp/run.sh
Executable file
@@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
# If there is a lib/ld.so, use it
|
||||||
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec $CURDIR/lib/ld.so $CURDIR/bark-cpp "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec $CURDIR/bark-cpp "$@"
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
|
|
||||||
BUILD_TYPE?=
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
gobark.o:
|
|
||||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
|
||||||
|
|
||||||
libbark.a: gobark.o
|
|
||||||
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
|
||||||
$(AR) rcs libbark.a gobark.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f gobark.o libbark.a
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
exec ./bark-cpp
|
|
||||||
9
backend/go/huggingface/Makefile
Normal file
9
backend/go/huggingface/Makefile
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
GOCMD=go
|
||||||
|
|
||||||
|
huggingface:
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o huggingface ./
|
||||||
|
|
||||||
|
package:
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: huggingface package
|
||||||
12
backend/go/huggingface/package.sh
Executable file
12
backend/go/huggingface/package.sh
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to copy the appropriate libraries based on architecture
|
||||||
|
# This script is used in the final stage of the Dockerfile
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
mkdir -p $CURDIR/package
|
||||||
|
cp -avrf $CURDIR/huggingface $CURDIR/package/
|
||||||
|
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||||
6
backend/go/huggingface/run.sh
Executable file
6
backend/go/huggingface/run.sh
Executable file
@@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
exec $CURDIR/huggingface "$@"
|
||||||
@@ -1,135 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
CMAKE_ARGS?=
|
|
||||||
BUILD_TYPE?=
|
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
|
|
||||||
GOCMD?=go
|
|
||||||
CGO_LDFLAGS?=
|
|
||||||
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
|
|
||||||
CGO_LDFLAGS_SYCL=
|
|
||||||
GO_TAGS?=
|
|
||||||
LD_FLAGS?=
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
|
||||||
CMAKE_ARGS+=-DSD_CUDA=ON
|
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# to CMAKE_ARGS automatically
|
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
|
||||||
CMAKE_ARGS+=-DSD_HIPBLAS=ON
|
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
|
||||||
# But if it's OSX without metal, disable it here
|
|
||||||
else ifeq ($(OS),Darwin)
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DSD_METAL=OFF
|
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DSD_METAL=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DSD_SYCL=ON \
|
|
||||||
-DGGML_SYCL_F16=ON
|
|
||||||
CC=icx
|
|
||||||
CXX=icpx
|
|
||||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
|
||||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
|
||||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
|
||||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
|
||||||
-DCMAKE_C_COMPILER=icx \
|
|
||||||
-DCMAKE_CXX_COMPILER=icpx \
|
|
||||||
-DSD_SYCL=ON
|
|
||||||
CC=icx
|
|
||||||
CXX=icpx
|
|
||||||
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
|
||||||
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
|
||||||
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
|
||||||
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
|
||||||
endif
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
# Find all .a archives in ARCHIVE_DIR
|
|
||||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
|
||||||
GGML_ARCHIVE_DIR := build/ggml/src/
|
|
||||||
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
|
|
||||||
|
|
||||||
# Name of the single merged library
|
|
||||||
COMBINED_LIB := libggmlall.a
|
|
||||||
|
|
||||||
# Rule to merge all the .a files into one
|
|
||||||
$(COMBINED_LIB): $(ALL_ARCHIVES)
|
|
||||||
@echo "Merging all .a into $(COMBINED_LIB)"
|
|
||||||
rm -f $@
|
|
||||||
mkdir -p merge-tmp
|
|
||||||
for a in $(ALL_ARCHIVES); do \
|
|
||||||
( cd merge-tmp && ar x ../$$a ); \
|
|
||||||
done
|
|
||||||
( cd merge-tmp && ar rcs ../$@ *.o )
|
|
||||||
# Ensure we have a proper index
|
|
||||||
ranlib $@
|
|
||||||
# Clean up
|
|
||||||
rm -rf merge-tmp
|
|
||||||
|
|
||||||
build/libstable-diffusion.a:
|
|
||||||
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release"
|
|
||||||
else
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
endif
|
|
||||||
$(MAKE) $(COMBINED_LIB)
|
|
||||||
|
|
||||||
gosd.o:
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
|
|
||||||
else
|
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
|
||||||
endif
|
|
||||||
|
|
||||||
libsd.a: gosd.o
|
|
||||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
|
||||||
$(AR) rcs libsd.a gosd.o
|
|
||||||
|
|
||||||
stablediffusion-ggml:
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
|
||||||
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
|
|
||||||
endif
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
|
||||||
@@ -1,231 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <random>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include "gosd.h"
|
|
||||||
|
|
||||||
// #include "preprocessing.hpp"
|
|
||||||
#include "flux.hpp"
|
|
||||||
#include "stable-diffusion.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_STATIC
|
|
||||||
#include "stb_image.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_WRITE_STATIC
|
|
||||||
#include "stb_image_write.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_RESIZE_STATIC
|
|
||||||
#include "stb_image_resize.h"
|
|
||||||
|
|
||||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
|
||||||
const char* sample_method_str[] = {
|
|
||||||
"euler_a",
|
|
||||||
"euler",
|
|
||||||
"heun",
|
|
||||||
"dpm2",
|
|
||||||
"dpm++2s_a",
|
|
||||||
"dpm++2m",
|
|
||||||
"dpm++2mv2",
|
|
||||||
"ipndm",
|
|
||||||
"ipndm_v",
|
|
||||||
"lcm",
|
|
||||||
"ddim_trailing",
|
|
||||||
"tcd",
|
|
||||||
};
|
|
||||||
|
|
||||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
|
||||||
const char* schedule_str[] = {
|
|
||||||
"default",
|
|
||||||
"discrete",
|
|
||||||
"karras",
|
|
||||||
"exponential",
|
|
||||||
"ays",
|
|
||||||
"gits",
|
|
||||||
};
|
|
||||||
|
|
||||||
sd_ctx_t* sd_c;
|
|
||||||
|
|
||||||
sample_method_t sample_method;
|
|
||||||
|
|
||||||
int load_model(char *model, char* options[], int threads, int diff) {
|
|
||||||
fprintf (stderr, "Loading model!\n");
|
|
||||||
|
|
||||||
char *stableDiffusionModel = "";
|
|
||||||
if (diff == 1 ) {
|
|
||||||
stableDiffusionModel = model;
|
|
||||||
model = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
|
||||||
char *clip_l_path = "";
|
|
||||||
char *clip_g_path = "";
|
|
||||||
char *t5xxl_path = "";
|
|
||||||
char *vae_path = "";
|
|
||||||
char *scheduler = "";
|
|
||||||
char *sampler = "";
|
|
||||||
|
|
||||||
// If options is not NULL, parse options
|
|
||||||
for (int i = 0; options[i] != NULL; i++) {
|
|
||||||
char *optname = strtok(options[i], ":");
|
|
||||||
char *optval = strtok(NULL, ":");
|
|
||||||
if (optval == NULL) {
|
|
||||||
optval = "true";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcmp(optname, "clip_l_path")) {
|
|
||||||
clip_l_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "clip_g_path")) {
|
|
||||||
clip_g_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "t5xxl_path")) {
|
|
||||||
t5xxl_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "vae_path")) {
|
|
||||||
vae_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "scheduler")) {
|
|
||||||
scheduler = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "sampler")) {
|
|
||||||
sampler = optval;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int sample_method_found = -1;
|
|
||||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
|
||||||
if (!strcmp(sampler, sample_method_str[m])) {
|
|
||||||
sample_method_found = m;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (sample_method_found == -1) {
|
|
||||||
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
|
|
||||||
sample_method_found = EULER_A;
|
|
||||||
}
|
|
||||||
sample_method = (sample_method_t)sample_method_found;
|
|
||||||
|
|
||||||
int schedule_found = -1;
|
|
||||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
|
||||||
if (!strcmp(scheduler, schedule_str[d])) {
|
|
||||||
schedule_found = d;
|
|
||||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (schedule_found == -1) {
|
|
||||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
|
||||||
schedule_found = DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
schedule_t schedule = (schedule_t)schedule_found;
|
|
||||||
|
|
||||||
fprintf (stderr, "Creating context\n");
|
|
||||||
sd_ctx_t* sd_ctx = new_sd_ctx(model,
|
|
||||||
clip_l_path,
|
|
||||||
clip_g_path,
|
|
||||||
t5xxl_path,
|
|
||||||
stableDiffusionModel,
|
|
||||||
vae_path,
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
threads,
|
|
||||||
SD_TYPE_COUNT,
|
|
||||||
STD_DEFAULT_RNG,
|
|
||||||
schedule,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false);
|
|
||||||
|
|
||||||
if (sd_ctx == NULL) {
|
|
||||||
fprintf (stderr, "failed loading model (generic error)\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fprintf (stderr, "Created context: OK\n");
|
|
||||||
|
|
||||||
sd_c = sd_ctx;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
|
|
||||||
|
|
||||||
sd_image_t* results;
|
|
||||||
|
|
||||||
std::vector<int> skip_layers = {7, 8, 9};
|
|
||||||
|
|
||||||
fprintf (stderr, "Generating image\n");
|
|
||||||
|
|
||||||
results = txt2img(sd_c,
|
|
||||||
text,
|
|
||||||
negativeText,
|
|
||||||
-1, //clip_skip
|
|
||||||
cfg_scale, // sfg_scale
|
|
||||||
3.5f,
|
|
||||||
0, // eta
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
sample_method,
|
|
||||||
steps,
|
|
||||||
seed,
|
|
||||||
1,
|
|
||||||
NULL,
|
|
||||||
0.9f,
|
|
||||||
20.f,
|
|
||||||
false,
|
|
||||||
"",
|
|
||||||
skip_layers.data(),
|
|
||||||
skip_layers.size(),
|
|
||||||
0,
|
|
||||||
0.01,
|
|
||||||
0.2);
|
|
||||||
|
|
||||||
if (results == NULL) {
|
|
||||||
fprintf (stderr, "NO results\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results[0].data == NULL) {
|
|
||||||
fprintf (stderr, "Results with no data\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf (stderr, "Writing PNG\n");
|
|
||||||
|
|
||||||
fprintf (stderr, "DST: %s\n", dst);
|
|
||||||
fprintf (stderr, "Width: %d\n", results[0].width);
|
|
||||||
fprintf (stderr, "Height: %d\n", results[0].height);
|
|
||||||
fprintf (stderr, "Channel: %d\n", results[0].channel);
|
|
||||||
fprintf (stderr, "Data: %p\n", results[0].data);
|
|
||||||
|
|
||||||
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
|
|
||||||
results[0].data, 0, NULL);
|
|
||||||
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
|
|
||||||
|
|
||||||
// TODO: free results. Why does it crash?
|
|
||||||
|
|
||||||
free(results[0].data);
|
|
||||||
results[0].data = NULL;
|
|
||||||
free(results);
|
|
||||||
fprintf (stderr, "gen_image is done", dst);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int unload() {
|
|
||||||
free_sd_ctx(sd_c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
|
|
||||||
// #include <gosd.h>
|
|
||||||
// #include <stdlib.h>
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SDGGML struct {
|
|
||||||
base.SingleThread
|
|
||||||
threads int
|
|
||||||
sampleMethod string
|
|
||||||
cfgScale float32
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
|
||||||
|
|
||||||
sd.threads = int(opts.Threads)
|
|
||||||
|
|
||||||
modelFile := C.CString(opts.ModelFile)
|
|
||||||
defer C.free(unsafe.Pointer(modelFile))
|
|
||||||
|
|
||||||
var options **C.char
|
|
||||||
// prepare the options array to pass to C
|
|
||||||
|
|
||||||
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
|
|
||||||
length := C.size_t(len(opts.Options))
|
|
||||||
options = (**C.char)(C.malloc(length * size))
|
|
||||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
|
|
||||||
|
|
||||||
var diffusionModel int
|
|
||||||
|
|
||||||
var oo []string
|
|
||||||
for _, op := range opts.Options {
|
|
||||||
if op == "diffusion_model" {
|
|
||||||
diffusionModel = 1
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it's an option path, we resolve absolute path from the model path
|
|
||||||
if strings.Contains(op, ":") && strings.Contains(op, "path") {
|
|
||||||
data := strings.Split(op, ":")
|
|
||||||
data[1] = filepath.Join(opts.ModelPath, data[1])
|
|
||||||
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
|
|
||||||
oo = append(oo, strings.Join(data, ":"))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
oo = append(oo, op)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
|
|
||||||
|
|
||||||
for i, x := range oo {
|
|
||||||
view[i] = C.CString(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
sd.cfgScale = opts.CFGScale
|
|
||||||
|
|
||||||
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("could not load model")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
t := C.CString(opts.PositivePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(t))
|
|
||||||
|
|
||||||
dst := C.CString(opts.Dst)
|
|
||||||
defer C.free(unsafe.Pointer(dst))
|
|
||||||
|
|
||||||
negative := C.CString(opts.NegativePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(negative))
|
|
||||||
|
|
||||||
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
int load_model(char *model, char* options[], int threads, int diffusionModel);
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
9
backend/go/local-store/Makefile
Normal file
9
backend/go/local-store/Makefile
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
GOCMD=go
|
||||||
|
|
||||||
|
local-store:
|
||||||
|
CGO_ENABLED=0 $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o local-store ./
|
||||||
|
|
||||||
|
package:
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: local-store package
|
||||||
12
backend/go/local-store/package.sh
Executable file
12
backend/go/local-store/package.sh
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to copy the appropriate libraries based on architecture
|
||||||
|
# This script is used in the final stage of the Dockerfile
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
mkdir -p $CURDIR/package
|
||||||
|
cp -avrf $CURDIR/local-store $CURDIR/package/
|
||||||
|
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||||
6
backend/go/local-store/run.sh
Executable file
6
backend/go/local-store/run.sh
Executable file
@@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
exec $CURDIR/local-store "$@"
|
||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"slices"
|
"slices"
|
||||||
@@ -99,6 +100,9 @@ func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Store) Load(opts *pb.ModelOptions) error {
|
func (s *Store) Load(opts *pb.ModelOptions) error {
|
||||||
|
if opts.Model != "" {
|
||||||
|
return errors.New("not implemented")
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -315,7 +319,7 @@ func isNormalized(k []float32) bool {
|
|||||||
|
|
||||||
for _, v := range k {
|
for _, v := range k {
|
||||||
v64 := float64(v)
|
v64 := float64(v)
|
||||||
sum += v64*v64
|
sum += v64 * v64
|
||||||
}
|
}
|
||||||
|
|
||||||
s := math.Sqrt(sum)
|
s := math.Sqrt(sum)
|
||||||
37
backend/go/piper/Makefile
Normal file
37
backend/go/piper/Makefile
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
# go-piper version
|
||||||
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
|
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||||
|
|
||||||
|
CURRENT_DIR=$(abspath ./)
|
||||||
|
GOCMD=go
|
||||||
|
|
||||||
|
PIPER_CGO_CXXFLAGS+=-I$(CURRENT_DIR)/sources/go-piper/piper/src/cpp -I$(CURRENT_DIR)/sources/go-piper/piper/build/fi/include -I$(CURRENT_DIR)/sources/go-piper/piper/build/pi/include -I$(CURRENT_DIR)/sources/go-piper/piper/build/si/include
|
||||||
|
PIPER_CGO_LDFLAGS+=-L$(CURRENT_DIR)/sources/go-piper/piper/build/fi/lib -L$(CURRENT_DIR)/sources/go-piper/piper/build/pi/lib -L$(CURRENT_DIR)/sources/go-piper/piper/build/si/lib -lfmt -lspdlog -lucd
|
||||||
|
|
||||||
|
## go-piper
|
||||||
|
sources/go-piper:
|
||||||
|
mkdir -p sources/go-piper
|
||||||
|
cd sources/go-piper && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(PIPER_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(PIPER_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
|
espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
|
||||||
|
mkdir -p espeak-ng-data
|
||||||
|
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. espeak-ng-data
|
||||||
|
|
||||||
|
piper: sources/go-piper sources/go-piper/libpiper_binding.a espeak-ng-data
|
||||||
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURRENT_DIR)/sources/go-piper
|
||||||
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURRENT_DIR)/sources/go-piper \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o piper ./
|
||||||
|
|
||||||
|
package:
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: piper package
|
||||||
54
backend/go/piper/package.sh
Executable file
54
backend/go/piper/package.sh
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to copy the appropriate libraries based on architecture
|
||||||
|
# This script is used in the final stage of the Dockerfile
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
# Create lib directory
|
||||||
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
|
cp -avrf $CURDIR/piper $CURDIR/package/
|
||||||
|
cp -avrf $CURDIR/espeak-ng-data $CURDIR/package/
|
||||||
|
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||||
|
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
||||||
|
|
||||||
|
# Detect architecture and copy appropriate libraries
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
# x86_64 architecture
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
# ARM64 architecture
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah $CURDIR/package/
|
||||||
|
ls -liah $CURDIR/package/lib/
|
||||||
@@ -23,7 +23,7 @@ func (sd *Piper) Load(opts *pb.ModelOptions) error {
|
|||||||
}
|
}
|
||||||
var err error
|
var err error
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
sd.piper, err = New(opts.LibrarySearchPath)
|
sd.piper, err = New(os.Getenv("ESPEAK_NG_DATA"))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
15
backend/go/piper/run.sh
Executable file
15
backend/go/piper/run.sh
Executable file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
export ESPEAK_NG_DATA=$CURDIR/espeak-ng-data
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
# If there is a lib/ld.so, use it
|
||||||
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec $CURDIR/lib/ld.so $CURDIR/piper "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec $CURDIR/piper "$@"
|
||||||
47
backend/go/silero-vad/Makefile
Normal file
47
backend/go/silero-vad/Makefile
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
|
||||||
|
CURRENT_DIR=$(abspath ./)
|
||||||
|
GOCMD=go
|
||||||
|
|
||||||
|
ONNX_VERSION?=1.20.0
|
||||||
|
ONNX_ARCH?=x64
|
||||||
|
ONNX_OS?=linux
|
||||||
|
|
||||||
|
# Detect if we are running on arm64
|
||||||
|
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
||||||
|
ONNX_ARCH=aarch64
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(OS),Darwin)
|
||||||
|
ONNX_OS=osx
|
||||||
|
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
||||||
|
ONNX_ARCH=arm64
|
||||||
|
else ifneq (,$(findstring arm64,$(shell uname -m)))
|
||||||
|
ONNX_ARCH=arm64
|
||||||
|
else
|
||||||
|
ONNX_ARCH=x86_64
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
sources/onnxruntime:
|
||||||
|
mkdir -p sources/onnxruntime
|
||||||
|
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
||||||
|
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
||||||
|
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
|
||||||
|
|
||||||
|
backend-assets/lib/libonnxruntime.so.1: sources/onnxruntime
|
||||||
|
mkdir -p backend-assets/lib
|
||||||
|
cp -rfLv sources/onnxruntime/lib/* backend-assets/lib/
|
||||||
|
ifeq ($(OS),Darwin)
|
||||||
|
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
|
||||||
|
else
|
||||||
|
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
||||||
|
endif
|
||||||
|
|
||||||
|
silero-vad: backend-assets/lib/libonnxruntime.so.1
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURRENT_DIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURRENT_DIR)/backend-assets/lib \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o silero-vad ./
|
||||||
|
|
||||||
|
package:
|
||||||
|
bash package.sh
|
||||||
|
|
||||||
|
build: silero-vad package
|
||||||
53
backend/go/silero-vad/package.sh
Executable file
53
backend/go/silero-vad/package.sh
Executable file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to copy the appropriate libraries based on architecture
|
||||||
|
# This script is used in the final stage of the Dockerfile
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
# Create lib directory
|
||||||
|
mkdir -p $CURDIR/package/lib
|
||||||
|
|
||||||
|
cp -avrf $CURDIR/silero-vad $CURDIR/package/
|
||||||
|
cp -avrf $CURDIR/run.sh $CURDIR/package/
|
||||||
|
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
||||||
|
|
||||||
|
# Detect architecture and copy appropriate libraries
|
||||||
|
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
|
# x86_64 architecture
|
||||||
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
|
cp -arfLv /lib/x86_64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
elif [ -f "/lib/ld-linux-aarch64.so.1" ]; then
|
||||||
|
# ARM64 architecture
|
||||||
|
echo "Detected ARM64 architecture, copying ARM64 libraries..."
|
||||||
|
cp -arfLv /lib/ld-linux-aarch64.so.1 $CURDIR/package/lib/ld.so
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libc.so.6 $CURDIR/package/lib/libc.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libm.so.6 $CURDIR/package/lib/libm.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgomp.so.1 $CURDIR/package/lib/libgomp.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libgcc_s.so.1 $CURDIR/package/lib/libgcc_s.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libstdc++.so.6 $CURDIR/package/lib/libstdc++.so.6
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libdl.so.2 $CURDIR/package/lib/libdl.so.2
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/librt.so.1 $CURDIR/package/lib/librt.so.1
|
||||||
|
cp -arfLv /lib/aarch64-linux-gnu/libpthread.so.0 $CURDIR/package/lib/libpthread.so.0
|
||||||
|
else
|
||||||
|
echo "Error: Could not detect architecture"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Packaging completed successfully"
|
||||||
|
ls -liah $CURDIR/package/
|
||||||
|
ls -liah $CURDIR/package/lib/
|
||||||
14
backend/go/silero-vad/run.sh
Executable file
14
backend/go/silero-vad/run.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath $0)")
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CURDIR/lib:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
# If there is a lib/ld.so, use it
|
||||||
|
if [ -f $CURDIR/lib/ld.so ]; then
|
||||||
|
echo "Using lib/ld.so"
|
||||||
|
exec $CURDIR/lib/ld.so $CURDIR/silero-vad "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec $CURDIR/silero-vad "$@"
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user