mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 08:38:02 -04:00
Compare commits
628 Commits
feat/realt
...
libmtmd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd4c0b8aa6 | ||
|
|
7437d0c9ca | ||
|
|
029f97c2a2 | ||
|
|
3be71be696 | ||
|
|
6adb019f8f | ||
|
|
fcaa0a2f01 | ||
|
|
fd17a3312c | ||
|
|
12d0fe610b | ||
|
|
11c67d16b8 | ||
|
|
63f7c86c4d | ||
|
|
ac89bf77bf | ||
|
|
0395cc02fb | ||
|
|
616972fca0 | ||
|
|
942fbff62d | ||
|
|
2612a0c910 | ||
|
|
2dcb6d7247 | ||
|
|
6978eec69f | ||
|
|
2fcfe54466 | ||
|
|
4e7506a3be | ||
|
|
2a46217f90 | ||
|
|
31ff9dbd52 | ||
|
|
9483abef03 | ||
|
|
ce3e8b3e31 | ||
|
|
f3bb84c9a7 | ||
|
|
ecb1297582 | ||
|
|
73fc702b3c | ||
|
|
e3af62ae1a | ||
|
|
dc21604741 | ||
|
|
5433f1a70e | ||
|
|
d5e032bdcd | ||
|
|
de786f6586 | ||
|
|
8b9bc4aa6e | ||
|
|
e6cea7d28e | ||
|
|
7d7d56f2ce | ||
|
|
1caae91ab6 | ||
|
|
e90f2cb0ca | ||
|
|
5a4291fadd | ||
|
|
91ef58ee5a | ||
|
|
a86e8c78f1 | ||
|
|
adb24214c6 | ||
|
|
f03a0430aa | ||
|
|
73bc12abc0 | ||
|
|
7fa437bbcc | ||
|
|
4a27c99928 | ||
|
|
6ce94834b6 | ||
|
|
84a26458dc | ||
|
|
7aa377b6a9 | ||
|
|
64e66dda4a | ||
|
|
a085f61fdc | ||
|
|
21bdfe5fa4 | ||
|
|
7ebd7b2454 | ||
|
|
6984749ea1 | ||
|
|
c0a206bc7a | ||
|
|
01bbb31fb3 | ||
|
|
72111c597d | ||
|
|
b2f9fc870b | ||
|
|
1fc6d469ac | ||
|
|
05848b2027 | ||
|
|
1da0644aa3 | ||
|
|
c087cd1377 | ||
|
|
c621412f6a | ||
|
|
5a8b1892cd | ||
|
|
5b20426863 | ||
|
|
5c6cd50ed6 | ||
|
|
bace6516f1 | ||
|
|
3baadf6f27 | ||
|
|
8804c701b8 | ||
|
|
7b3ceb19bb | ||
|
|
e7f3effea1 | ||
|
|
61694a2ffb | ||
|
|
573a3f104c | ||
|
|
0e8af53a5b | ||
|
|
960ffa808c | ||
|
|
92719568e5 | ||
|
|
163939af71 | ||
|
|
399f1241dc | ||
|
|
58c9ade2e8 | ||
|
|
6e1c93d84f | ||
|
|
4076ea0494 | ||
|
|
26cbf77c0d | ||
|
|
640790d628 | ||
|
|
4132adea2f | ||
|
|
2b2d907a3a | ||
|
|
6e8f4f584b | ||
|
|
662cfc2b48 | ||
|
|
a25d355d66 | ||
|
|
6d1cfdbefc | ||
|
|
5ecc478968 | ||
|
|
aef5c4291b | ||
|
|
c059f912b9 | ||
|
|
bc1e059259 | ||
|
|
38dc07793a | ||
|
|
da6ef0967d | ||
|
|
7a011e60bd | ||
|
|
e13dd5b09f | ||
|
|
86ee303bd6 | ||
|
|
978ee96fd3 | ||
|
|
3ad5691db6 | ||
|
|
0027681090 | ||
|
|
8cba990edc | ||
|
|
88857696d4 | ||
|
|
23f347e687 | ||
|
|
b6e3dc5f02 | ||
|
|
69667521e2 | ||
|
|
2a92effc5d | ||
|
|
a65e012aa2 | ||
|
|
8e9b41d05f | ||
|
|
078da5c2f0 | ||
|
|
c5af5d139c | ||
|
|
2c9279a542 | ||
|
|
a67d22f5f2 | ||
|
|
dc7c51dcc7 | ||
|
|
98df65c7aa | ||
|
|
1559b6b522 | ||
|
|
a0244e3fb4 | ||
|
|
d66396201a | ||
|
|
9628860c0e | ||
|
|
cae9bf1308 | ||
|
|
5bb5da0760 | ||
|
|
867973a850 | ||
|
|
701cd6b6d5 | ||
|
|
7f61d397d5 | ||
|
|
1ae0b896fa | ||
|
|
3937407cb3 | ||
|
|
0e34ae4f3f | ||
|
|
a38b99ecb6 | ||
|
|
a4a4358182 | ||
|
|
4bc39c2db3 | ||
|
|
cc3df759f8 | ||
|
|
378161060c | ||
|
|
f2f788fe60 | ||
|
|
9fa8ed6b1e | ||
|
|
7fc37c5e29 | ||
|
|
4bc4b1e8bc | ||
|
|
e495b89f18 | ||
|
|
ba09eaea1b | ||
|
|
61cc76c455 | ||
|
|
8abecb4a18 | ||
|
|
8b3f76d8e6 | ||
|
|
4e0497f1a6 | ||
|
|
ba88c9f451 | ||
|
|
a598285825 | ||
|
|
cb7a172897 | ||
|
|
771be28dfb | ||
|
|
7d6b3eb42d | ||
|
|
0bb33fab55 | ||
|
|
e3bf7f77f7 | ||
|
|
bd1707d339 | ||
|
|
0474804541 | ||
|
|
72693b3917 | ||
|
|
a03b70010f | ||
|
|
e3717e5c1a | ||
|
|
c8f6858218 | ||
|
|
06d7cc43ae | ||
|
|
f2147cb850 | ||
|
|
75bb9f4c28 | ||
|
|
a2ef4b1e07 | ||
|
|
161c9fe2db | ||
|
|
7547463f81 | ||
|
|
32e4dfd47b | ||
|
|
f67e5dec68 | ||
|
|
297d54acea | ||
|
|
56f44d448c | ||
|
|
0f0fafacd9 | ||
|
|
4f239bac89 | ||
|
|
04d74ac648 | ||
|
|
18c3dc33ee | ||
|
|
508cfa7369 | ||
|
|
1f94cddbae | ||
|
|
21ae7b4cd4 | ||
|
|
bef22ab547 | ||
|
|
eb04e8cdcf | ||
|
|
17e533a086 | ||
|
|
4fc68409ff | ||
|
|
e587044449 | ||
|
|
1f09db5161 | ||
|
|
05b744f086 | ||
|
|
89ca4bc02d | ||
|
|
e626aa48a4 | ||
|
|
752b5e0339 | ||
|
|
637d72d6e3 | ||
|
|
f3bfec580a | ||
|
|
165c1ddff3 | ||
|
|
fb83238e9e | ||
|
|
700bfa41c7 | ||
|
|
25bdc350df | ||
|
|
1b899e1a68 | ||
|
|
3bf13f8c69 | ||
|
|
7a00729374 | ||
|
|
d484028532 | ||
|
|
0eb7fc2c41 | ||
|
|
a69e30e0c9 | ||
|
|
9c018e6bff | ||
|
|
281e818047 | ||
|
|
270f0e2157 | ||
|
|
673e59e76c | ||
|
|
5a8a2adb44 | ||
|
|
a7317d23bf | ||
|
|
2bab9b5fe2 | ||
|
|
081be3ba7d | ||
|
|
25e6f21322 | ||
|
|
b4df1c9cf3 | ||
|
|
4fbd6609f2 | ||
|
|
7387932f89 | ||
|
|
59c37e67b2 | ||
|
|
c09d227647 | ||
|
|
547d322b28 | ||
|
|
a6f0bb410f | ||
|
|
710f624ecd | ||
|
|
5018452be7 | ||
|
|
ece239966f | ||
|
|
3b8bc7e64c | ||
|
|
fc73b2b430 | ||
|
|
901dba6063 | ||
|
|
b88a7a4550 | ||
|
|
106e40845f | ||
|
|
0064bec8f5 | ||
|
|
9e6dbb0b5a | ||
|
|
d26e61388b | ||
|
|
31a7084c75 | ||
|
|
128612a6fc | ||
|
|
6af3f46bc3 | ||
|
|
d2cf8ef070 | ||
|
|
259ad3cfe6 | ||
|
|
18b320d577 | ||
|
|
89e151f035 | ||
|
|
22060f6410 | ||
|
|
7ee3288460 | ||
|
|
cbbc954a8c | ||
|
|
2c425e9c69 | ||
|
|
c59975ab05 | ||
|
|
05f7004487 | ||
|
|
2f9203cd2a | ||
|
|
f09b33f2ef | ||
|
|
65470b0ab1 | ||
|
|
9a23fe662b | ||
|
|
6d7ac09e96 | ||
|
|
c2a39e3639 | ||
|
|
ae625a4d00 | ||
|
|
7f3a029596 | ||
|
|
b34cf00819 | ||
|
|
d4a10b4300 | ||
|
|
9c74d74f7b | ||
|
|
679ee7bea4 | ||
|
|
77d7dc62c4 | ||
|
|
699519d1fe | ||
|
|
8faf39d34e | ||
|
|
5d261a6fcd | ||
|
|
22d5727089 | ||
|
|
c965197d6f | ||
|
|
994a6c4939 | ||
|
|
f926d2a72b | ||
|
|
ddeb9ed93e | ||
|
|
c7e99c7b59 | ||
|
|
6fabc92e56 | ||
|
|
4645b3c919 | ||
|
|
134fe2705c | ||
|
|
3cca32ba7e | ||
|
|
c069e61b26 | ||
|
|
7fa159e164 | ||
|
|
5f92025617 | ||
|
|
333e1bc732 | ||
|
|
e90b97c144 | ||
|
|
747eeb1d46 | ||
|
|
5d2c53abc0 | ||
|
|
0b1e721242 | ||
|
|
8c76a9ce99 | ||
|
|
338321af5b | ||
|
|
2774a92484 | ||
|
|
1a6bfb41a1 | ||
|
|
314981eaf8 | ||
|
|
d7266c633d | ||
|
|
eb4d5f2b95 | ||
|
|
c63b449ad6 | ||
|
|
dd4a778c2c | ||
|
|
a0896d21d6 | ||
|
|
0e697f951a | ||
|
|
fa4bb9082d | ||
|
|
8ff7b15441 | ||
|
|
dd45f85a20 | ||
|
|
decdd9e522 | ||
|
|
31a21d4a2c | ||
|
|
2c129843a7 | ||
|
|
ce71a0bcfb | ||
|
|
0a32c38317 | ||
|
|
36f596f260 | ||
|
|
953552545b | ||
|
|
835e55b1de | ||
|
|
dcd2921eaa | ||
|
|
5e6459fd18 | ||
|
|
50ddb3eb59 | ||
|
|
5eebfee4b5 | ||
|
|
567919ea90 | ||
|
|
27a3997530 | ||
|
|
192ba2c657 | ||
|
|
92abac9ca8 | ||
|
|
04ebbbd73a | ||
|
|
55305e0d95 | ||
|
|
67623639e4 | ||
|
|
cc76def342 | ||
|
|
4967fa5928 | ||
|
|
2b98e4ec56 | ||
|
|
fa1d058ee2 | ||
|
|
a49a588bfa | ||
|
|
ca7dda61c6 | ||
|
|
ffedddd76d | ||
|
|
766c76ae8e | ||
|
|
3096ff33e9 | ||
|
|
90a7451da4 | ||
|
|
529a4b9ee8 | ||
|
|
0567e104eb | ||
|
|
ecbeacd022 | ||
|
|
2772960e41 | ||
|
|
1b694191e2 | ||
|
|
69578a5f8f | ||
|
|
7d96cfe72b | ||
|
|
423514a5a5 | ||
|
|
12568c7d6d | ||
|
|
8d16a0a536 | ||
|
|
87ca801f00 | ||
|
|
e4ecbb6c30 | ||
|
|
b1a67de2b9 | ||
|
|
71a23910fe | ||
|
|
0ede31f9cf | ||
|
|
9f5dcf2d1e | ||
|
|
e878556e98 | ||
|
|
b096928172 | ||
|
|
db7442ae67 | ||
|
|
b6cd430e08 | ||
|
|
478e50cda2 | ||
|
|
1db2b9943c | ||
|
|
ac41aa8b67 | ||
|
|
156a98e2e7 | ||
|
|
d88ec1209e | ||
|
|
fde8dbfc80 | ||
|
|
879dc73eba | ||
|
|
1dfc52de16 | ||
|
|
1331129485 | ||
|
|
1cd98062e5 | ||
|
|
9791d9b77a | ||
|
|
8956452a45 | ||
|
|
f3659fa49c | ||
|
|
585f2be793 | ||
|
|
d13f160222 | ||
|
|
db5495b9d7 | ||
|
|
3def1ae232 | ||
|
|
c6ebead8e5 | ||
|
|
cff4a950e0 | ||
|
|
e4fa894153 | ||
|
|
69caccfa82 | ||
|
|
ab50c13160 | ||
|
|
56d4e82b14 | ||
|
|
09b5bd48bc | ||
|
|
957dcfb6a9 | ||
|
|
67f7bffd18 | ||
|
|
de81b42b49 | ||
|
|
06eb7e9fa7 | ||
|
|
45bc1ac566 | ||
|
|
02aafeff75 | ||
|
|
6b46c52789 | ||
|
|
d732e261a4 | ||
|
|
807c574e91 | ||
|
|
bb171a39b3 | ||
|
|
941a4fc50e | ||
|
|
afe65bd7bf | ||
|
|
6f9762049c | ||
|
|
122970d70d | ||
|
|
8664b1c7a2 | ||
|
|
c92166f38a | ||
|
|
d616058b12 | ||
|
|
a7b4001b75 | ||
|
|
ff85f01459 | ||
|
|
695f81a08b | ||
|
|
326be287da | ||
|
|
0404d98190 | ||
|
|
0a8ec1eb22 | ||
|
|
d860932dcd | ||
|
|
1cb137bd2d | ||
|
|
3c279e5568 | ||
|
|
fb55e3df57 | ||
|
|
de46fb6e2e | ||
|
|
d7a0e3c5ea | ||
|
|
0533ea817d | ||
|
|
755e4fb5f4 | ||
|
|
e4fdde158f | ||
|
|
6d0712fa6d | ||
|
|
bbbb28e3ca | ||
|
|
3bf2e9d065 | ||
|
|
1461fd8777 | ||
|
|
054860539a | ||
|
|
c87870b18e | ||
|
|
5ad2be9c45 | ||
|
|
61a24746a1 | ||
|
|
d557eb9361 | ||
|
|
a9a1a361a9 | ||
|
|
12d070af80 | ||
|
|
8d40557bc8 | ||
|
|
5a5f3a899a | ||
|
|
a2d1f133c8 | ||
|
|
0ae6420c31 | ||
|
|
3a3e05cf18 | ||
|
|
6a20388e25 | ||
|
|
06c836a937 | ||
|
|
049a13fe78 | ||
|
|
30bf6c962f | ||
|
|
a72b3a23c3 | ||
|
|
e9971b168a | ||
|
|
5b59b5e0c1 | ||
|
|
8cfd712428 | ||
|
|
21f7faa80d | ||
|
|
a6a0121118 | ||
|
|
ba66aa33c5 | ||
|
|
8fc024a770 | ||
|
|
52aa9d08aa | ||
|
|
4c9379c39e | ||
|
|
0ff2c39364 | ||
|
|
1af7e5dc49 | ||
|
|
af3bb64e42 | ||
|
|
77281f836e | ||
|
|
550275811d | ||
|
|
c27ce6c54d | ||
|
|
ac4991b069 | ||
|
|
25bee71bb8 | ||
|
|
b993780a3b | ||
|
|
ea0c9f1168 | ||
|
|
08311f275a | ||
|
|
4de0f2f737 | ||
|
|
42ae807c41 | ||
|
|
94593ba4c3 | ||
|
|
6a6e1a0ea9 | ||
|
|
5b19af99ff | ||
|
|
28fb8e607a | ||
|
|
bb85b6ef00 | ||
|
|
b9b5a635ca | ||
|
|
131ea5b627 | ||
|
|
fac70e9642 | ||
|
|
7e76ea40fb | ||
|
|
de09ae42ef | ||
|
|
6424f0666d | ||
|
|
f3ae94ca70 | ||
|
|
09c9f67a02 | ||
|
|
c264ca542d | ||
|
|
bbf30d416d | ||
|
|
27617a1b06 | ||
|
|
e84081769e | ||
|
|
20119fc580 | ||
|
|
09941c0bfb | ||
|
|
cabe0f4993 | ||
|
|
1977c7f190 | ||
|
|
061e7c4eae | ||
|
|
5313e660f6 | ||
|
|
9e32fda304 | ||
|
|
83202cae54 | ||
|
|
d96addfa9d | ||
|
|
a715fe588d | ||
|
|
2ac4a86bb4 | ||
|
|
8670d480a6 | ||
|
|
af0b4ff237 | ||
|
|
e694764065 | ||
|
|
f3c27e0381 | ||
|
|
bf44319d0d | ||
|
|
5b133a640b | ||
|
|
0030a3fe75 | ||
|
|
0a748b009e | ||
|
|
257e951def | ||
|
|
fbd82a2dd0 | ||
|
|
5db321dad2 | ||
|
|
f5638a6354 | ||
|
|
5f64cc6328 | ||
|
|
28b10e8804 | ||
|
|
3277f5095d | ||
|
|
fe3ced2919 | ||
|
|
45e37a07bb | ||
|
|
e57b750ca3 | ||
|
|
49df492268 | ||
|
|
516cd660f1 | ||
|
|
8fd3ace9a1 | ||
|
|
099469cb05 | ||
|
|
6be8c0c618 | ||
|
|
3cddf24747 | ||
|
|
c330360785 | ||
|
|
8cd51570e5 | ||
|
|
0e7aa5cd15 | ||
|
|
e06a5f49de | ||
|
|
fb2f847507 | ||
|
|
e01acc88c9 | ||
|
|
7a5912908a | ||
|
|
4b1b942a7f | ||
|
|
230fe0098f | ||
|
|
cc163429dc | ||
|
|
f670e0a91c | ||
|
|
731674eee7 | ||
|
|
cc1f6f913f | ||
|
|
7f90ff7aec | ||
|
|
8d45670e41 | ||
|
|
e4b8ddb6a1 | ||
|
|
a801561f81 | ||
|
|
16ced07102 | ||
|
|
d35595372d | ||
|
|
81be192279 | ||
|
|
28a1310890 | ||
|
|
2a702e9ca4 | ||
|
|
3ecaea1b6e | ||
|
|
7daf5ac3e3 | ||
|
|
7bc80c17f8 | ||
|
|
1996ceb293 | ||
|
|
0bc3dc43da | ||
|
|
3324c4e6cb | ||
|
|
7329db4e78 | ||
|
|
464686aee6 | ||
|
|
bfa3d4ccff | ||
|
|
6a91288c8c | ||
|
|
96cb407ee0 | ||
|
|
5a19094d3a | ||
|
|
e3b943ffcb | ||
|
|
df30d6a482 | ||
|
|
c3c27b7e3d | ||
|
|
431716d4d6 | ||
|
|
d290fd159f | ||
|
|
051faaf771 | ||
|
|
41a2dfb0d9 | ||
|
|
ed0094c3d0 | ||
|
|
52fadeded1 | ||
|
|
a37fa8d9c4 | ||
|
|
03974a4dd4 | ||
|
|
1d6afbd65d | ||
|
|
d79f02ea09 | ||
|
|
ba2f426e3e | ||
|
|
732042e5c6 | ||
|
|
f1763aabf2 | ||
|
|
e0d90b173b | ||
|
|
ff07612bfa | ||
|
|
7badaf78a0 | ||
|
|
af41436f1b | ||
|
|
cd5489ce47 | ||
|
|
60ec2cf751 | ||
|
|
244f4b564f | ||
|
|
f1d6d65417 | ||
|
|
72e52c4f6a | ||
|
|
1656e1a88e | ||
|
|
7f62b418a4 | ||
|
|
1f4e66d638 | ||
|
|
a37b2c765c | ||
|
|
b4b67e00bd | ||
|
|
91e1ff5a95 | ||
|
|
d9204ea3b5 | ||
|
|
3d0fbcb4f7 | ||
|
|
03f3df9a82 | ||
|
|
fff35d5528 | ||
|
|
539e94db73 | ||
|
|
0f4f62cf3c | ||
|
|
e7cffd7afa | ||
|
|
26d790a2b6 | ||
|
|
5cf838c08d | ||
|
|
4db8f5cbce | ||
|
|
3b6b37a81b | ||
|
|
8f5aa2d9de | ||
|
|
a6bc8aa7c7 | ||
|
|
4ab107bc1a | ||
|
|
4c3710a531 | ||
|
|
901b06284a | ||
|
|
8eef5a2c5e | ||
|
|
e9cace137b | ||
|
|
9409c99738 | ||
|
|
4d44ebc2f2 | ||
|
|
9a1182fa01 | ||
|
|
66e9ef3f33 | ||
|
|
8282414583 | ||
|
|
d1d7ce83d4 | ||
|
|
5177837ab0 | ||
|
|
f9e368b7c4 | ||
|
|
eef80b9880 | ||
|
|
073eaec729 | ||
|
|
318225f631 | ||
|
|
89429a439b | ||
|
|
200fe358f0 | ||
|
|
e426ab7c23 | ||
|
|
715071b68d | ||
|
|
a05737c7e4 | ||
|
|
e8eb0b2c50 | ||
|
|
e15d29aba2 | ||
|
|
10675ac28e | ||
|
|
0ec25b8b07 | ||
|
|
e81ceff681 | ||
|
|
6831719e1e | ||
|
|
b264a91b3f | ||
|
|
1a08948e63 | ||
|
|
14a1e02f44 | ||
|
|
2f09aa1b85 | ||
|
|
a396040886 | ||
|
|
aeb1dca52e | ||
|
|
83a8d90c52 | ||
|
|
adebd557ce | ||
|
|
0c0e015b38 | ||
|
|
390bb3f58b | ||
|
|
30739d94a4 | ||
|
|
83e2dd5dff | ||
|
|
f496d0113b | ||
|
|
a752183fb5 | ||
|
|
296b97925f | ||
|
|
d0cc3047dc | ||
|
|
032a33de49 | ||
|
|
1e9bf19c8d | ||
|
|
4bd8434ae0 | ||
|
|
958f6eb722 | ||
|
|
96306a39a0 | ||
|
|
895cd7c76a | ||
|
|
cbdbe59f16 | ||
|
|
ee7904f170 | ||
|
|
a761e01944 | ||
|
|
96f8ec0402 | ||
|
|
8027fdf1c7 | ||
|
|
212c8e1a6d | ||
|
|
78533d7230 | ||
|
|
b5eeb5c5ab | ||
|
|
b147ad0596 | ||
|
|
7d0ac1ea3f | ||
|
|
d08d97bebf | ||
|
|
acb2eb23c8 | ||
|
|
de4aa9fb1d | ||
|
|
560ba6f25e | ||
|
|
8131ddd878 | ||
|
|
26c3deb673 | ||
|
|
6d20497d45 | ||
|
|
482c6b8be4 | ||
|
|
5bba5edf45 | ||
|
|
792b866727 | ||
|
|
f053f7bde2 |
@@ -1,23 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: musicgen
|
|
||||||
type: http
|
|
||||||
seq: 1
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model_id": "facebook/musicgen-small",
|
|
||||||
"text": "Exciting 80s Newscast Interstitial",
|
|
||||||
"duration_seconds": 8
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: backend monitor
|
|
||||||
type: http
|
|
||||||
seq: 4
|
|
||||||
}
|
|
||||||
|
|
||||||
get {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: backend-shutdown
|
|
||||||
type: http
|
|
||||||
seq: 3
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
{
|
|
||||||
"version": "1",
|
|
||||||
"name": "LocalAI Test Requests",
|
|
||||||
"type": "collection"
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
vars {
|
|
||||||
HOST: localhost
|
|
||||||
PORT: 8080
|
|
||||||
DEFAULT_MODEL: gpt-3.5-turbo
|
|
||||||
PROTOCOL: http://
|
|
||||||
}
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: get models list
|
|
||||||
type: http
|
|
||||||
seq: 2
|
|
||||||
}
|
|
||||||
|
|
||||||
get {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
|
||||||
body: none
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: Generate image
|
|
||||||
type: http
|
|
||||||
seq: 1
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"prompt": "<positive prompt>|<negative prompt>",
|
|
||||||
"model": "model-name",
|
|
||||||
"step": 51,
|
|
||||||
"size": "1024x1024",
|
|
||||||
"image": ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: -completions
|
|
||||||
type: http
|
|
||||||
seq: 4
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}",
|
|
||||||
"prompt": "function downloadFile(string url, string outputPath) {",
|
|
||||||
"max_tokens": 256,
|
|
||||||
"temperature": 0.5
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: -edits
|
|
||||||
type: http
|
|
||||||
seq: 5
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}",
|
|
||||||
"input": "What day of the wek is it?",
|
|
||||||
"instruction": "Fix the spelling mistakes"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: -embeddings
|
|
||||||
type: http
|
|
||||||
seq: 6
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}",
|
|
||||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: chat completion -simple- 1 message-
|
|
||||||
type: http
|
|
||||||
seq: 4
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "How could one use friction to cook an egg?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"max_tokens": 256,
|
|
||||||
"temperature": 0.2,
|
|
||||||
"grammar": ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: chat-completions -long-
|
|
||||||
type: http
|
|
||||||
seq: 5
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}",
|
|
||||||
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
|
||||||
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
|
||||||
{"role": "assistant",
|
|
||||||
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
|
||||||
},
|
|
||||||
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
|
||||||
"max_tokens": 1024,
|
|
||||||
"temperature": 0.5
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: chat-completions -stream-
|
|
||||||
type: http
|
|
||||||
seq: 6
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}",
|
|
||||||
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
|
||||||
"max_tokens": 256,
|
|
||||||
"temperature": 0.9,
|
|
||||||
"stream": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: add model gallery
|
|
||||||
type: http
|
|
||||||
seq: 10
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
|
||||||
"name": "test"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: delete model gallery
|
|
||||||
type: http
|
|
||||||
seq: 11
|
|
||||||
}
|
|
||||||
|
|
||||||
delete {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"name": "test"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: list MODELS in galleries
|
|
||||||
type: http
|
|
||||||
seq: 7
|
|
||||||
}
|
|
||||||
|
|
||||||
get {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
|
||||||
body: none
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: list model GALLERIES
|
|
||||||
type: http
|
|
||||||
seq: 8
|
|
||||||
}
|
|
||||||
|
|
||||||
get {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
|
||||||
body: none
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: model delete
|
|
||||||
type: http
|
|
||||||
seq: 7
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
|
||||||
body: none
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: model gallery apply -gist-
|
|
||||||
type: http
|
|
||||||
seq: 12
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: model gallery apply
|
|
||||||
type: http
|
|
||||||
seq: 9
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
|
||||||
"name": "codellama7b"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Binary file not shown.
@@ -1,16 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: transcribe
|
|
||||||
type: http
|
|
||||||
seq: 1
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
|
||||||
body: multipartForm
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
body:multipart-form {
|
|
||||||
file: @file(transcription/gb1.ogg)
|
|
||||||
model: whisper-1
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: -tts
|
|
||||||
type: http
|
|
||||||
seq: 2
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"model": "{{DEFAULT_MODEL}}",
|
|
||||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: musicgen
|
|
||||||
type: http
|
|
||||||
seq: 2
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
|
||||||
body: json
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
headers {
|
|
||||||
Content-Type: application/json
|
|
||||||
}
|
|
||||||
|
|
||||||
body:json {
|
|
||||||
{
|
|
||||||
"backend": "transformers-musicgen",
|
|
||||||
"model": "facebook/musicgen-small",
|
|
||||||
"input": "80s Synths playing Jazz"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -7,7 +7,7 @@ services:
|
|||||||
args:
|
args:
|
||||||
- FFMPEG=true
|
- FFMPEG=true
|
||||||
- IMAGE_TYPE=extras
|
- IMAGE_TYPE=extras
|
||||||
- GO_TAGS=stablediffusion p2p tts
|
- GO_TAGS=p2p tts
|
||||||
env_file:
|
env_file:
|
||||||
- ../.env
|
- ../.env
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
11
.env
11
.env
@@ -29,6 +29,9 @@
|
|||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
|
# Forces shutdown of the backends if busy (only if LOCALAI_SINGLE_ACTIVE_BACKEND is set)
|
||||||
|
# LOCALAI_FORCE_BACKEND_SHUTDOWN=true
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
|
||||||
@@ -38,12 +41,12 @@
|
|||||||
## Uncomment and set to true to enable rebuilding from source
|
## Uncomment and set to true to enable rebuilding from source
|
||||||
# REBUILD=true
|
# REBUILD=true
|
||||||
|
|
||||||
## Enable go tags, available: stablediffusion, tts
|
## Enable go tags, available: p2p, tts
|
||||||
## stablediffusion: image generation with stablediffusion
|
## p2p: enable distributed inferencing
|
||||||
## tts: enables text-to-speech with go-piper
|
## tts: enables text-to-speech with go-piper
|
||||||
## (requires REBUILD=true)
|
## (requires REBUILD=true)
|
||||||
#
|
#
|
||||||
# GO_TAGS=stablediffusion
|
# GO_TAGS=p2p
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
@@ -73,7 +76,7 @@
|
|||||||
|
|
||||||
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
||||||
# https://github.com/ggerganov/llama.cpp/pull/6829
|
# https://github.com/ggerganov/llama.cpp/pull/6829
|
||||||
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
|
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
|
||||||
# LLAMACPP_GRPC_SERVERS=""
|
# LLAMACPP_GRPC_SERVERS=""
|
||||||
|
|
||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
|
|||||||
12
.github/dependabot.yml
vendored
12
.github/dependabot.yml
vendored
@@ -29,10 +29,6 @@ updates:
|
|||||||
schedule:
|
schedule:
|
||||||
# Check for updates to GitHub Actions every weekday
|
# Check for updates to GitHub Actions every weekday
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/autogptq"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/bark"
|
directory: "/backend/python/bark"
|
||||||
schedule:
|
schedule:
|
||||||
@@ -81,14 +77,6 @@ updates:
|
|||||||
directory: "/backend/python/transformers"
|
directory: "/backend/python/transformers"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/transformers-musicgen"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/vall-e-x"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/vllm"
|
directory: "/backend/python/vllm"
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
2
.github/labeler.yml
vendored
2
.github/labeler.yml
vendored
@@ -1,4 +1,4 @@
|
|||||||
enhancements:
|
enhancement:
|
||||||
- head-branch: ['^feature', 'feature']
|
- head-branch: ['^feature', 'feature']
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
|
|||||||
4
.github/workflows/bump_deps.yaml
vendored
4
.github/workflows/bump_deps.yaml
vendored
@@ -9,10 +9,10 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- repository: "ggerganov/llama.cpp"
|
- repository: "ggml-org/llama.cpp"
|
||||||
variable: "CPPLLAMA_VERSION"
|
variable: "CPPLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "ggerganov/whisper.cpp"
|
- repository: "ggml-org/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "PABannier/bark.cpp"
|
- repository: "PABannier/bark.cpp"
|
||||||
|
|||||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@v2.2.0
|
uses: dependabot/fetch-metadata@v2.4.0
|
||||||
with:
|
with:
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
|||||||
6
.github/workflows/deploy-explorer.yaml
vendored
6
.github/workflows/deploy-explorer.yaml
vendored
@@ -33,7 +33,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
CGO_ENABLED=0 make build-api
|
CGO_ENABLED=0 make build-api
|
||||||
- name: rm
|
- name: rm
|
||||||
uses: appleboy/ssh-action@v1.2.0
|
uses: appleboy/ssh-action@v1.2.2
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
@@ -42,7 +42,7 @@ jobs:
|
|||||||
script: |
|
script: |
|
||||||
sudo rm -rf local-ai/ || true
|
sudo rm -rf local-ai/ || true
|
||||||
- name: copy file via ssh
|
- name: copy file via ssh
|
||||||
uses: appleboy/scp-action@v0.1.7
|
uses: appleboy/scp-action@v1.0.0
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
@@ -53,7 +53,7 @@ jobs:
|
|||||||
rm: true
|
rm: true
|
||||||
target: ./local-ai
|
target: ./local-ai
|
||||||
- name: restarting
|
- name: restarting
|
||||||
uses: appleboy/ssh-action@v1.2.0
|
uses: appleboy/ssh-action@v1.2.2
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
|||||||
9
.github/workflows/generate_grpc_cache.yaml
vendored
9
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -2,9 +2,10 @@ name: 'generate and publish GRPC docker caches'
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
push:
|
|
||||||
branches:
|
schedule:
|
||||||
- master
|
# daily at midnight
|
||||||
|
- cron: '0 0 * * *'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
@@ -16,7 +17,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- grpc-base-image: ubuntu:22.04
|
- grpc-base-image: ubuntu:22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'arc-runner-set'
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
|
- base-image: intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
|
|||||||
50
.github/workflows/image-pr.yml
vendored
50
.github/workflows/image-pr.yml
vendored
@@ -33,6 +33,7 @@ jobs:
|
|||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 4 || 8 }}
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# This is basically covered by the AIO test
|
# This is basically covered by the AIO test
|
||||||
@@ -56,26 +57,35 @@ jobs:
|
|||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
# - build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
# platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
# tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
# tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
# ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
# image-type: 'extras'
|
image-type: 'extras'
|
||||||
# base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
# grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
# runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
# - build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
# platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
# tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
# base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
# grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
# tag-suffix: 'sycl-f16-ffmpeg'
|
tag-suffix: 'sycl-f16-ffmpeg'
|
||||||
# ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
# image-type: 'extras'
|
image-type: 'extras'
|
||||||
# runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
# makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'vulkan'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-vulkan-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
# core-image-build:
|
# core-image-build:
|
||||||
# uses: ./.github/workflows/image_build.yml
|
# uses: ./.github/workflows/image_build.yml
|
||||||
# with:
|
# with:
|
||||||
|
|||||||
243
.github/workflows/image.yml
vendored
243
.github/workflows/image.yml
vendored
@@ -45,13 +45,13 @@ jobs:
|
|||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-hipblas-ffmpeg'
|
tag-suffix: '-hipblas-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
aio: "-aio-gpu-hipblas"
|
aio: "-aio-gpu-hipblas"
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
latest-image: 'latest-gpu-hipblas'
|
latest-image: 'latest-gpu-hipblas-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
@@ -59,32 +59,13 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-hipblas'
|
tag-suffix: '-hipblas'
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
latest-image: 'latest-gpu-hipblas'
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-core'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
self-hosted-jobs:
|
self-hosted-jobs:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
@@ -114,110 +95,58 @@ jobs:
|
|||||||
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Extra images
|
|
||||||
- build-type: ''
|
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: ''
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: ''
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11'
|
tag-suffix: '-cublas-cuda11-extras'
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-cublas-cuda12'
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-11"
|
aio: "-aio-gpu-nvidia-cuda-11"
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-11'
|
latest-image: 'latest-gpu-nvidia-cuda-11-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
aio: "-aio-gpu-nvidia-cuda-12"
|
aio: "-aio-gpu-nvidia-cuda-12"
|
||||||
latest-image: 'latest-gpu-nvidia-cuda-12'
|
latest-image: 'latest-gpu-nvidia-cuda-12-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: ''
|
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: ''
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16-ffmpeg'
|
tag-suffix: '-sycl-f16-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f16"
|
aio: "-aio-gpu-intel-f16"
|
||||||
latest-image: 'latest-gpu-intel-f16'
|
latest-image: 'latest-gpu-intel-f16-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
latest-image-aio: 'latest-aio-gpu-intel-f16'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32-ffmpeg'
|
tag-suffix: '-sycl-f32-extras'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
aio: "-aio-gpu-intel-f32"
|
aio: "-aio-gpu-intel-f32"
|
||||||
latest-image: 'latest-gpu-intel-f32'
|
latest-image: 'latest-gpu-intel-f32-extras'
|
||||||
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
latest-image-aio: 'latest-aio-gpu-intel-f32'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
# Core images
|
# Core images
|
||||||
@@ -226,41 +155,23 @@ jobs:
|
|||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16-core'
|
tag-suffix: '-sycl-f16'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-intel-f16'
|
||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||||
grpc-base-image: "ubuntu:22.04"
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32-core'
|
tag-suffix: '-sycl-f32'
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f32'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-intel-f32'
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
@@ -293,7 +204,7 @@ jobs:
|
|||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64,linux/arm64'
|
platforms: 'linux/amd64,linux/arm64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-ffmpeg-core'
|
tag-suffix: ''
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
@@ -308,97 +219,75 @@ jobs:
|
|||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11-core'
|
tag-suffix: '-cublas-cuda11'
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-cublas-cuda12-core'
|
|
||||||
ffmpeg: ''
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "11"
|
|
||||||
cuda-minor-version: "7"
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
tag-suffix: '-cublas-cuda12'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
latest-image: 'latest-gpu-nvidia-cuda-12'
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-vulkan-ffmpeg-core'
|
tag-suffix: '-vulkan'
|
||||||
latest-image: 'latest-vulkan-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
# parallel-builds:
|
latest-image: 'latest-gpu-vulkan'
|
||||||
# uses: ./.github/workflows/image_build.yml
|
gh-runner:
|
||||||
# with:
|
uses: ./.github/workflows/image_build.yml
|
||||||
# tag-latest: ${{ matrix.tag-latest }}
|
with:
|
||||||
# tag-suffix: ${{ matrix.tag-suffix }}
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
# ffmpeg: ${{ matrix.ffmpeg }}
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
# image-type: ${{ matrix.image-type }}
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
# build-type: ${{ matrix.build-type }}
|
image-type: ${{ matrix.image-type }}
|
||||||
# cuda-major-version: ${{ matrix.cuda-major-version }}
|
build-type: ${{ matrix.build-type }}
|
||||||
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
# platforms: ${{ matrix.platforms }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
# runs-on: ${{ matrix.runs-on }}
|
platforms: ${{ matrix.platforms }}
|
||||||
# aio: ${{ matrix.aio }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
# base-image: ${{ matrix.base-image }}
|
aio: ${{ matrix.aio }}
|
||||||
# grpc-base-image: ${{ matrix.grpc-base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
# makeflags: ${{ matrix.makeflags }}
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
# latest-image: ${{ matrix.latest-image }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
# latest-image-aio: ${{ matrix.latest-image-aio }}
|
latest-image: ${{ matrix.latest-image }}
|
||||||
# skip-drivers: ${{ matrix.skip-drivers }}
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
# secrets:
|
skip-drivers: ${{ matrix.skip-drivers }}
|
||||||
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
secrets:
|
||||||
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
# strategy:
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
# matrix:
|
strategy:
|
||||||
# include:
|
matrix:
|
||||||
# - build-type: 'cublas'
|
include:
|
||||||
# cuda-major-version: "12"
|
- build-type: 'cublas'
|
||||||
# cuda-minor-version: "0"
|
cuda-major-version: "12"
|
||||||
# platforms: 'linux/arm64'
|
cuda-minor-version: "0"
|
||||||
# tag-latest: 'false'
|
platforms: 'linux/arm64'
|
||||||
# tag-suffix: '-nvidia-l4t-arm64-core'
|
tag-latest: 'false'
|
||||||
# latest-image: 'latest-nvidia-l4t-arm64-core'
|
tag-suffix: '-nvidia-l4t-arm64'
|
||||||
# ffmpeg: 'true'
|
latest-image: 'latest-nvidia-l4t-arm64'
|
||||||
# image-type: 'core'
|
ffmpeg: 'true'
|
||||||
# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
image-type: 'core'
|
||||||
# runs-on: 'self-hosted'
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
# makeflags: "--jobs=4 --output-sync=target"
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
# skip-drivers: 'true'
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
skip-drivers: 'true'
|
||||||
5
.github/workflows/image_build.yml
vendored
5
.github/workflows/image_build.yml
vendored
@@ -310,6 +310,11 @@ jobs:
|
|||||||
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
||||||
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
||||||
|
|
||||||
|
- name: Cleanup
|
||||||
|
run: |
|
||||||
|
docker builder prune -f
|
||||||
|
docker system prune --force --volumes --all
|
||||||
|
|
||||||
- name: Latest tag
|
- name: Latest tag
|
||||||
# run this on branches, when it is a tag and there is a latest-image defined
|
# run this on branches, when it is a tag and there is a latest-image defined
|
||||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
||||||
|
|||||||
14
.github/workflows/notify-models.yaml
vendored
14
.github/workflows/notify-models.yaml
vendored
@@ -8,7 +8,7 @@ jobs:
|
|||||||
notify-discord:
|
notify-discord:
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||||
env:
|
env:
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
MODEL_NAME: gemma-3-12b-it
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -16,9 +16,9 @@ jobs:
|
|||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||||
- uses: mudler/localai-github-action@v1
|
- uses: mudler/localai-github-action@v1
|
||||||
with:
|
with:
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
@@ -79,7 +79,7 @@ jobs:
|
|||||||
args: ${{ steps.summarize.outputs.message }}
|
args: ${{ steps.summarize.outputs.message }}
|
||||||
- name: Setup tmate session if fails
|
- name: Setup tmate session if fails
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -87,7 +87,7 @@ jobs:
|
|||||||
notify-twitter:
|
notify-twitter:
|
||||||
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
|
||||||
env:
|
env:
|
||||||
MODEL_NAME: hermes-2-theta-llama-3-8b
|
MODEL_NAME: gemma-3-12b-it
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -99,7 +99,7 @@ jobs:
|
|||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
@@ -161,7 +161,7 @@ jobs:
|
|||||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||||
- name: Setup tmate session if fails
|
- name: Setup tmate session if fails
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|||||||
2
.github/workflows/notify-releases.yaml
vendored
2
.github/workflows/notify-releases.yaml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: mudler/localai-github-action@v1
|
- uses: mudler/localai-github-action@v1
|
||||||
with:
|
with:
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'gemma-3-12b-it' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
- name: Summarize
|
- name: Summarize
|
||||||
id: summarize
|
id: summarize
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
51
.github/workflows/release.yaml
vendored
51
.github/workflows/release.yaml
vendored
@@ -36,6 +36,7 @@ jobs:
|
|||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
|
||||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
|
||||||
|
make install-go-tools
|
||||||
- name: Install CUDA Dependencies
|
- name: Install CUDA Dependencies
|
||||||
run: |
|
run: |
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||||
@@ -123,7 +124,7 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -151,6 +152,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||||
|
make install-go-tools
|
||||||
- name: Intel Dependencies
|
- name: Intel Dependencies
|
||||||
run: |
|
run: |
|
||||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||||
@@ -232,45 +234,12 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
limit-access-to-actor: true
|
limit-access-to-actor: true
|
||||||
build-stablediffusion:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Build stablediffusion
|
|
||||||
run: |
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
make backend-assets/grpc/stablediffusion
|
|
||||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
|
||||||
env:
|
|
||||||
GO_TAGS: stablediffusion
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: stablediffusion
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
|
|
||||||
build-macOS-x86_64:
|
build-macOS-x86_64:
|
||||||
runs-on: macos-13
|
runs-on: macos-13
|
||||||
@@ -286,8 +255,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
brew install protobuf grpc
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
make install-go-tools
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
@@ -308,7 +276,7 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -328,8 +296,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc libomp llvm
|
brew install protobuf grpc libomp llvm
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
make install-go-tools
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
@@ -350,7 +317,7 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.22.0
|
uses: securego/gosec@v2.22.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
155
.github/workflows/test-extra.yml
vendored
155
.github/workflows/test-extra.yml
vendored
@@ -35,30 +35,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||||
|
|
||||||
tests-sentencetransformers:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test sentencetransformers
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
|
||||||
|
|
||||||
|
|
||||||
tests-rerankers:
|
tests-rerankers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -102,78 +78,47 @@ jobs:
|
|||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||||
|
|
||||||
tests-parler-tts:
|
#tests-vllm:
|
||||||
runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
steps:
|
# steps:
|
||||||
- name: Clone
|
# - name: Clone
|
||||||
uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
with:
|
# with:
|
||||||
submodules: true
|
# submodules: true
|
||||||
- name: Dependencies
|
# - name: Dependencies
|
||||||
run: |
|
# run: |
|
||||||
sudo apt-get update
|
# sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
# sudo apt-get install -y build-essential ffmpeg
|
||||||
# Install UV
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
# sudo apt-get install -y libopencv-dev
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# # Install UV
|
||||||
sudo apt-get install -y libopencv-dev
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
# - name: Test vllm backend
|
||||||
|
# run: |
|
||||||
|
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||||
|
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||||
|
# tests-transformers-musicgen:
|
||||||
|
# runs-on: ubuntu-latest
|
||||||
|
# steps:
|
||||||
|
# - name: Clone
|
||||||
|
# uses: actions/checkout@v4
|
||||||
|
# with:
|
||||||
|
# submodules: true
|
||||||
|
# - name: Dependencies
|
||||||
|
# run: |
|
||||||
|
# sudo apt-get update
|
||||||
|
# sudo apt-get install build-essential ffmpeg
|
||||||
|
# # Install UV
|
||||||
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
# sudo apt-get install -y libopencv-dev
|
||||||
|
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test parler-tts
|
# - name: Test transformers-musicgen
|
||||||
run: |
|
# run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-openvoice:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test openvoice
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
|
||||||
|
|
||||||
tests-transformers-musicgen:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test transformers-musicgen
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
@@ -260,26 +205,6 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||||
tests-vallex:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
- name: Test vall-e-x
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
|
||||||
|
|
||||||
tests-coqui:
|
tests-coqui:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
20
.github/workflows/test.yml
vendored
20
.github/workflows/test.yml
vendored
@@ -71,7 +71,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
|
||||||
sudo apt-get install -y libgmock-dev
|
sudo apt-get install -y libgmock-dev clang
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
@@ -96,19 +96,17 @@ jobs:
|
|||||||
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
make -C backend/python/transformers
|
||||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
|
||||||
|
|
||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
make sources/go-piper && \
|
make sources/go-piper && \
|
||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
CUDA_VERSION: 12-4
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
@@ -130,10 +128,10 @@ jobs:
|
|||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -186,6 +184,7 @@ jobs:
|
|||||||
rm protoc.zip
|
rm protoc.zip
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
PATH="$PATH:$HOME/go/bin" make protogen-go
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
@@ -197,7 +196,7 @@ jobs:
|
|||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -225,6 +224,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
pip install --user --no-cache-dir grpcio-tools
|
pip install --user --no-cache-dir grpcio-tools
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
@@ -235,7 +235,7 @@ jobs:
|
|||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.22
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|||||||
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -26,7 +26,7 @@
|
|||||||
"LOCALAI_P2P": "true",
|
"LOCALAI_P2P": "true",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
},
|
},
|
||||||
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
"buildFlags": ["-tags", "p2p tts", "-v"],
|
||||||
"envFile": "${workspaceFolder}/.env",
|
"envFile": "${workspaceFolder}/.env",
|
||||||
"cwd": "${workspaceRoot}"
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
|
|||||||
83
Dockerfile
83
Dockerfile
@@ -15,8 +15,7 @@ ARG TARGETARCH
|
|||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
@@ -25,6 +24,7 @@ RUN apt-get update && \
|
|||||||
ca-certificates \
|
ca-certificates \
|
||||||
curl libssl-dev \
|
curl libssl-dev \
|
||||||
git \
|
git \
|
||||||
|
git-lfs \
|
||||||
unzip upx-ucl && \
|
unzip upx-ucl && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
@@ -46,9 +46,10 @@ EOT
|
|||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers
|
# Install grpc compilers and rice
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
@@ -69,14 +70,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
|
|||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libopenblas-dev \
|
libopenblas-dev && \
|
||||||
libopencv-dev && \
|
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Set up OpenCV
|
|
||||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
@@ -251,7 +248,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
|
|
||||||
FROM requirements-drivers AS builder-base
|
FROM requirements-drivers AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts p2p"
|
ARG GO_TAGS="tts p2p"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG LD_FLAGS="-s -w"
|
ARG LD_FLAGS="-s -w"
|
||||||
@@ -285,35 +282,12 @@ RUN <<EOT bash
|
|||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
|
||||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
|
||||||
FROM builder-base AS builder-sd
|
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
|
||||||
COPY Makefile .
|
|
||||||
COPY go.mod .
|
|
||||||
COPY go.sum .
|
|
||||||
COPY backend/backend.proto ./backend/backend.proto
|
|
||||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
COPY pkg/grpc ./pkg/grpc
|
|
||||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
|
||||||
RUN git init
|
|
||||||
RUN make sources/go-stable-diffusion
|
|
||||||
RUN touch prepare-sources
|
|
||||||
|
|
||||||
# Actually build the backend
|
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
# Adjustments to the build process should likely be made here.
|
# Adjustments to the build process should likely be made here.
|
||||||
FROM builder-sd AS builder
|
FROM builder-base AS builder
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
# Install the pre-built GRPC
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
@@ -327,11 +301,10 @@ COPY .git .
|
|||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
## Build the binary
|
## Build the binary
|
||||||
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
## If we're on arm64 AND using cublas/hipblas, skip some of the llama-compat backends to save space
|
||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
## Otherwise just run the normal build
|
||||||
## (both will use CUDA or hipblas for the actual computation)
|
RUN if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
fi
|
fi
|
||||||
@@ -353,8 +326,6 @@ ARG FFMPEG
|
|||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
@@ -427,36 +398,28 @@ COPY --from=builder /build/local-ai ./
|
|||||||
# Copy shared libraries for piper
|
# Copy shared libraries for piper
|
||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
# Change the shell to bash so we can use [[ tests below
|
||||||
SHELL ["/bin/bash", "-c"]
|
SHELL ["/bin/bash", "-c"]
|
||||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||||
|
|
||||||
|
RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
||||||
|
apt-get -qq -y install espeak-ng \
|
||||||
|
; fi
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/coqui \
|
make -C backend/python/coqui \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/parler-tts \
|
make -C backend/python/faster-whisper \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/diffusers \
|
make -C backend/python/diffusers \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/transformers-musicgen \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/vall-e-x \
|
make -C backend/python/kokoro \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/openvoice \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/sentencetransformers \
|
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/exllama2 \
|
make -C backend/python/exllama2 \
|
||||||
@@ -468,17 +431,11 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
|
|||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/vllm \
|
make -C backend/python/vllm \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/autogptq \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/bark \
|
make -C backend/python/bark \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/rerankers \
|
make -C backend/python/rerankers \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/mamba \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
|
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
286
Makefile
286
Makefile
@@ -6,33 +6,26 @@ BINARY_NAME=local-ai
|
|||||||
DETECT_LIBS?=true
|
DETECT_LIBS?=true
|
||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
CPPLLAMA_VERSION?=e5c834f718a32b7584f142799bbf508fddb9021c
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
|
||||||
CPPLLAMA_VERSION?=504af20ee4eae72080a56d59d744f6774f7901ce
|
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
WHISPER_CPP_VERSION?=e41bc5c61ae66af6be2bd7011769bb821a83e8ae
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||||
|
|
||||||
# stablediffusion version
|
|
||||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
|
||||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
|
||||||
|
|
||||||
# tinydream version
|
|
||||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
|
||||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
|
||||||
|
|
||||||
# bark.cpp
|
# bark.cpp
|
||||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||||
BARKCPP_VERSION?=v1.0.0
|
BARKCPP_VERSION?=v1.0.0
|
||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/richiejp/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a
|
STABLEDIFFUSION_GGML_VERSION?=53e3b17eb3d0b5760ced06a1f98320b68b34aaae
|
||||||
|
|
||||||
|
# ONEAPI variables for SYCL
|
||||||
|
export ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
ONNX_VERSION?=1.20.0
|
ONNX_VERSION?=1.20.0
|
||||||
ONNX_ARCH?=x64
|
ONNX_ARCH?=x64
|
||||||
@@ -40,8 +33,12 @@ ONNX_OS?=linux
|
|||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
export CMAKE_ARGS?=
|
export CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
|
||||||
export BACKEND_LIBS?=
|
export BACKEND_LIBS?=
|
||||||
|
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)
|
||||||
|
export WHISPER_INCLUDE_PATH=$(WHISPER_DIR)/include:$(WHISPER_DIR)/ggml/include
|
||||||
|
export WHISPER_LIBRARY_PATH=$(WHISPER_DIR)/build/src/:$(WHISPER_DIR)/build/ggml/src
|
||||||
|
|
||||||
CGO_LDFLAGS?=
|
CGO_LDFLAGS?=
|
||||||
CGO_LDFLAGS_WHISPER?=
|
CGO_LDFLAGS_WHISPER?=
|
||||||
@@ -91,6 +88,7 @@ endif
|
|||||||
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
||||||
ifeq ($(NATIVE),false)
|
ifeq ($(NATIVE),false)
|
||||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Detect if we are running on arm64
|
# Detect if we are running on arm64
|
||||||
@@ -118,13 +116,31 @@ ifeq ($(OS),Darwin)
|
|||||||
# disable metal if on Darwin and any other value is explicitly passed.
|
# disable metal if on Darwin and any other value is explicitly passed.
|
||||||
else ifneq ($(BUILD_TYPE),metal)
|
else ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
export GGML_NO_ACCELERATE=1
|
export GGML_NO_ACCELERATE=1
|
||||||
export GGML_NO_METAL=1
|
export GGML_NO_METAL=1
|
||||||
|
GO_LDFLAGS_WHISPER+=-lggml-blas
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
# -lcblas removed: it seems to always be listed as a duplicate flag.
|
|
||||||
CGO_LDFLAGS += -framework Accelerate
|
CGO_LDFLAGS += -framework Accelerate
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-metal -lggml-blas
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL_USE_BF16=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_EXAMPLES=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_TESTS=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DWHISPER_BUILD_SERVER=OFF
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_OPENMP=OFF
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-metal/:$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
|
else
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-blas
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-blas
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
CGO_LDFLAGS_WHISPER+=-lgomp
|
CGO_LDFLAGS_WHISPER+=-lgomp
|
||||||
@@ -136,21 +152,29 @@ ifeq ($(BUILD_TYPE),openblas)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH) -L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||||
export GGML_CUDA=1
|
export GGML_CUDA=1
|
||||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lcufft -lggml-cuda
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-cuda/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),vulkan)
|
ifeq ($(BUILD_TYPE),vulkan)
|
||||||
CMAKE_ARGS+=-DGGML_VULKAN=1
|
CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||||
|
WHISPER_CMAKE_ARGS+=-DGGML_VULKAN=1
|
||||||
|
CGO_LDFLAGS_WHISPER+=-lggml-vulkan -lvulkan
|
||||||
|
export WHISPER_LIBRARY_PATH:=$(WHISPER_LIBRARY_PATH):$(WHISPER_DIR)/build/ggml/src/ggml-vulkan/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
export GGML_SYCL=1
|
export GGML_SYCL=1
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
export GGML_SYCL_F16=1
|
export GGML_SYCL_F16=1
|
||||||
|
CMAKE_ARGS+=-DGGML_SYCL_F16=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),hipblas)
|
ifeq ($(BUILD_TYPE),hipblas)
|
||||||
@@ -159,10 +183,9 @@ ifeq ($(BUILD_TYPE),hipblas)
|
|||||||
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
||||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
# llama-ggml has no hipblas support, so override it here.
|
|
||||||
export STABLE_BUILD_TYPE=
|
export STABLE_BUILD_TYPE=
|
||||||
export GGML_HIP=1
|
export GGML_HIP=1
|
||||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
GPU_TARGETS ?= gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102
|
||||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||||
@@ -183,16 +206,6 @@ ifeq ($(STATIC),true)
|
|||||||
LD_FLAGS+=-linkmode external -extldflags -static
|
LD_FLAGS+=-linkmode external -extldflags -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
|
||||||
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
|
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
|
|
||||||
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
|
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
@@ -204,8 +217,8 @@ endif
|
|||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
@@ -239,19 +252,6 @@ endif
|
|||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
## go-llama.cpp
|
|
||||||
sources/go-llama.cpp:
|
|
||||||
mkdir -p sources/go-llama.cpp
|
|
||||||
cd sources/go-llama.cpp && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(GOLLAMA_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(GOLLAMA_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
|
||||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
|
||||||
|
|
||||||
## bark.cpp
|
## bark.cpp
|
||||||
sources/bark.cpp:
|
sources/bark.cpp:
|
||||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||||
@@ -282,19 +282,6 @@ sources/go-piper:
|
|||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
## stable diffusion (onnx)
|
|
||||||
sources/go-stable-diffusion:
|
|
||||||
mkdir -p sources/go-stable-diffusion
|
|
||||||
cd sources/go-stable-diffusion && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(STABLEDIFFUSION_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
|
||||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
|
||||||
|
|
||||||
## stablediffusion (ggml)
|
## stablediffusion (ggml)
|
||||||
sources/stablediffusion-ggml.cpp:
|
sources/stablediffusion-ggml.cpp:
|
||||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||||
@@ -307,11 +294,7 @@ backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
|||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
$(MAKE) -C backend/go/image/stablediffusion-ggml CGO_LDFLAGS="$(CGO_LDFLAGS)" stablediffusion-ggml
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
|
||||||
endif
|
|
||||||
|
|
||||||
sources/onnxruntime:
|
sources/onnxruntime:
|
||||||
mkdir -p sources/onnxruntime
|
mkdir -p sources/onnxruntime
|
||||||
@@ -327,19 +310,6 @@ else
|
|||||||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
## tiny-dream
|
|
||||||
sources/go-tiny-dream:
|
|
||||||
mkdir -p sources/go-tiny-dream
|
|
||||||
cd sources/go-tiny-dream && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(TINYDREAM_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(TINYDREAM_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
|
||||||
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
|
||||||
|
|
||||||
## whisper
|
## whisper
|
||||||
sources/whisper.cpp:
|
sources/whisper.cpp:
|
||||||
mkdir -p sources/whisper.cpp
|
mkdir -p sources/whisper.cpp
|
||||||
@@ -350,26 +320,21 @@ sources/whisper.cpp:
|
|||||||
git checkout $(WHISPER_CPP_VERSION) && \
|
git checkout $(WHISPER_CPP_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
|
||||||
|
cd sources/whisper.cpp/build && cmake --build . --config Release
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
@@ -377,11 +342,8 @@ prepare-sources: get-sources replace
|
|||||||
## GENERIC
|
## GENERIC
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) -C sources/go-tiny-dream clean
|
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
@@ -410,8 +372,14 @@ clean-tests:
|
|||||||
clean-dc: clean
|
clean-dc: clean
|
||||||
cp -r /build/backend-assets /workspace/backend-assets
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
|
## Install Go tools
|
||||||
|
install-go-tools:
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install github.com/GeertJohan/go.rice/rice@latest
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
build: prepare backend-assets grpcs ## Build the project
|
build: prepare backend-assets grpcs install-go-tools ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||||
@@ -421,7 +389,9 @@ ifneq ($(BACKEND_LIBS),)
|
|||||||
$(MAKE) backend-assets/lib
|
$(MAKE) backend-assets/lib
|
||||||
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
cp -f $(BACKEND_LIBS) backend-assets/lib/
|
||||||
endif
|
endif
|
||||||
|
rm -rf $(BINARY_NAME) || true
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||||
|
rice append --exec $(BINARY_NAME)
|
||||||
|
|
||||||
build-minimal:
|
build-minimal:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||||
@@ -483,7 +453,7 @@ run: prepare ## run local-ai
|
|||||||
test-models/testmodel.ggml:
|
test-models/testmodel.ggml:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
|
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
|
||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
@@ -493,13 +463,13 @@ prepare-test: grpcs
|
|||||||
cp -rf backend-assets core/http
|
cp -rf backend-assets core/http
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
|
## Test targets
|
||||||
test: prepare test-models/testmodel.ggml grpcs
|
test: prepare test-models/testmodel.ggml grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion debug"
|
export GO_TAGS="tts debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
$(MAKE) test-llama
|
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
$(MAKE) test-stablediffusion
|
$(MAKE) test-stablediffusion
|
||||||
@@ -528,10 +498,6 @@ teardown-e2e:
|
|||||||
rm -rf $(TEST_DIR) || true
|
rm -rf $(TEST_DIR) || true
|
||||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
test-llama: prepare-test
|
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
|
||||||
|
|
||||||
test-llama-gguf: prepare-test
|
test-llama-gguf: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||||
@@ -572,7 +538,7 @@ protogen: protogen-go protogen-python
|
|||||||
protogen-clean: protogen-go-clean protogen-python-clean
|
protogen-clean: protogen-go-clean protogen-python-clean
|
||||||
|
|
||||||
.PHONY: protogen-go
|
.PHONY: protogen-go
|
||||||
protogen-go:
|
protogen-go: install-go-tools
|
||||||
mkdir -p pkg/grpc/proto
|
mkdir -p pkg/grpc/proto
|
||||||
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||||
backend/backend.proto
|
backend/backend.proto
|
||||||
@@ -583,18 +549,10 @@ protogen-go-clean:
|
|||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
|
||||||
autogptq-protogen:
|
|
||||||
$(MAKE) -C backend/python/autogptq protogen
|
|
||||||
|
|
||||||
.PHONY: autogptq-protogen-clean
|
|
||||||
autogptq-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/autogptq protogen-clean
|
|
||||||
|
|
||||||
.PHONY: bark-protogen
|
.PHONY: bark-protogen
|
||||||
bark-protogen:
|
bark-protogen:
|
||||||
@@ -620,6 +578,14 @@ diffusers-protogen:
|
|||||||
diffusers-protogen-clean:
|
diffusers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: faster-whisper-protogen
|
||||||
|
faster-whisper-protogen:
|
||||||
|
$(MAKE) -C backend/python/faster-whisper protogen
|
||||||
|
|
||||||
|
.PHONY: faster-whisper-protogen-clean
|
||||||
|
faster-whisper-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/faster-whisper protogen-clean
|
||||||
|
|
||||||
.PHONY: exllama2-protogen
|
.PHONY: exllama2-protogen
|
||||||
exllama2-protogen:
|
exllama2-protogen:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen
|
$(MAKE) -C backend/python/exllama2 protogen
|
||||||
@@ -628,14 +594,6 @@ exllama2-protogen:
|
|||||||
exllama2-protogen-clean:
|
exllama2-protogen-clean:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||||
|
|
||||||
.PHONY: mamba-protogen
|
|
||||||
mamba-protogen:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen
|
|
||||||
|
|
||||||
.PHONY: mamba-protogen-clean
|
|
||||||
mamba-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen-clean
|
|
||||||
|
|
||||||
.PHONY: rerankers-protogen
|
.PHONY: rerankers-protogen
|
||||||
rerankers-protogen:
|
rerankers-protogen:
|
||||||
$(MAKE) -C backend/python/rerankers protogen
|
$(MAKE) -C backend/python/rerankers protogen
|
||||||
@@ -644,14 +602,6 @@ rerankers-protogen:
|
|||||||
rerankers-protogen-clean:
|
rerankers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen
|
|
||||||
sentencetransformers-protogen:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen-clean
|
|
||||||
sentencetransformers-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
|
||||||
|
|
||||||
.PHONY: transformers-protogen
|
.PHONY: transformers-protogen
|
||||||
transformers-protogen:
|
transformers-protogen:
|
||||||
$(MAKE) -C backend/python/transformers protogen
|
$(MAKE) -C backend/python/transformers protogen
|
||||||
@@ -660,37 +610,13 @@ transformers-protogen:
|
|||||||
transformers-protogen-clean:
|
transformers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/transformers protogen-clean
|
$(MAKE) -C backend/python/transformers protogen-clean
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen
|
.PHONY: kokoro-protogen
|
||||||
parler-tts-protogen:
|
kokoro-protogen:
|
||||||
$(MAKE) -C backend/python/parler-tts protogen
|
$(MAKE) -C backend/python/kokoro protogen
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen-clean
|
.PHONY: kokoro-protogen-clean
|
||||||
parler-tts-protogen-clean:
|
kokoro-protogen-clean:
|
||||||
$(MAKE) -C backend/python/parler-tts protogen-clean
|
$(MAKE) -C backend/python/kokoro protogen-clean
|
||||||
|
|
||||||
.PHONY: transformers-musicgen-protogen
|
|
||||||
transformers-musicgen-protogen:
|
|
||||||
$(MAKE) -C backend/python/transformers-musicgen protogen
|
|
||||||
|
|
||||||
.PHONY: transformers-musicgen-protogen-clean
|
|
||||||
transformers-musicgen-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
|
|
||||||
|
|
||||||
.PHONY: vall-e-x-protogen
|
|
||||||
vall-e-x-protogen:
|
|
||||||
$(MAKE) -C backend/python/vall-e-x protogen
|
|
||||||
|
|
||||||
.PHONY: vall-e-x-protogen-clean
|
|
||||||
vall-e-x-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
|
||||||
|
|
||||||
.PHONY: openvoice-protogen
|
|
||||||
openvoice-protogen:
|
|
||||||
$(MAKE) -C backend/python/openvoice protogen
|
|
||||||
|
|
||||||
.PHONY: openvoice-protogen-clean
|
|
||||||
openvoice-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/openvoice protogen-clean
|
|
||||||
|
|
||||||
.PHONY: vllm-protogen
|
.PHONY: vllm-protogen
|
||||||
vllm-protogen:
|
vllm-protogen:
|
||||||
@@ -703,28 +629,25 @@ vllm-protogen-clean:
|
|||||||
## GRPC
|
## GRPC
|
||||||
# Note: it is duplicated in the Dockerfile
|
# Note: it is duplicated in the Dockerfile
|
||||||
prepare-extra-conda-environments: protogen-python
|
prepare-extra-conda-environments: protogen-python
|
||||||
$(MAKE) -C backend/python/autogptq
|
|
||||||
$(MAKE) -C backend/python/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C backend/python/coqui
|
$(MAKE) -C backend/python/coqui
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
$(MAKE) -C backend/python/faster-whisper
|
||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
$(MAKE) -C backend/python/mamba
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers
|
|
||||||
$(MAKE) -C backend/python/rerankers
|
$(MAKE) -C backend/python/rerankers
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/transformers-musicgen
|
$(MAKE) -C backend/python/kokoro
|
||||||
$(MAKE) -C backend/python/parler-tts
|
|
||||||
$(MAKE) -C backend/python/vall-e-x
|
|
||||||
$(MAKE) -C backend/python/openvoice
|
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
$(MAKE) -C backend/python/vllm
|
||||||
|
|
||||||
test-extra: prepare-test-extra
|
test-extra: prepare-test-extra
|
||||||
$(MAKE) -C backend/python/transformers test
|
$(MAKE) -C backend/python/transformers test
|
||||||
$(MAKE) -C backend/python/diffusers test
|
$(MAKE) -C backend/python/diffusers test
|
||||||
|
$(MAKE) -C backend/python/vllm test
|
||||||
|
|
||||||
backend-assets:
|
backend-assets:
|
||||||
mkdir -p backend-assets
|
mkdir -p backend-assets
|
||||||
@@ -785,6 +708,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
|
|||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
||||||
|
$(MAKE) -C backend/cpp/llama-avx512 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
@@ -838,13 +768,6 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
|||||||
mkdir -p backend-assets/util/
|
mkdir -p backend-assets/util/
|
||||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||||
|
|
||||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/llama-ggml
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||||
@@ -859,13 +782,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/piper
|
$(UPX) backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/stablediffusion
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||||
@@ -873,15 +789,8 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/silero-vad
|
$(UPX) backend-assets/grpc/silero-vad
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/tinydream
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
$(UPX) backend-assets/grpc/whisper
|
$(UPX) backend-assets/grpc/whisper
|
||||||
@@ -933,7 +842,8 @@ docker-aio-all:
|
|||||||
|
|
||||||
docker-image-intel:
|
docker-image-intel:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--progress plain \
|
||||||
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu24.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
@@ -941,7 +851,7 @@ docker-image-intel:
|
|||||||
|
|
||||||
docker-image-intel-xpu:
|
docker-image-intel-xpu:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
@@ -953,7 +863,7 @@ swagger:
|
|||||||
|
|
||||||
.PHONY: gen-assets
|
.PHONY: gen-assets
|
||||||
gen-assets:
|
gen-assets:
|
||||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
docs/layouts/_default:
|
docs/layouts/_default:
|
||||||
|
|||||||
118
README.md
118
README.md
@@ -1,7 +1,6 @@
|
|||||||
<h1 align="center">
|
<h1 align="center">
|
||||||
<br>
|
<br>
|
||||||
<img height="300" src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"> <br>
|
<img height="300" src="./core/http/static/logo.png"> <br>
|
||||||
LocalAI
|
|
||||||
<br>
|
<br>
|
||||||
</h1>
|
</h1>
|
||||||
|
|
||||||
@@ -31,7 +30,7 @@
|
|||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://twitter.com/LocalAI_API" target="blank">
|
<a href="https://twitter.com/LocalAI_API" target="blank">
|
||||||
<img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
|
<img src="https://img.shields.io/badge/X-%23000000.svg?style=for-the-badge&logo=X&logoColor=white&label=LocalAI_API" alt="Follow LocalAI_API"/>
|
||||||
</a>
|
</a>
|
||||||
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
<a href="https://discord.gg/uJAeKSAGDy" target="blank">
|
||||||
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
<img src="https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted" alt="Join LocalAI Discord Community"/>
|
||||||
@@ -39,37 +38,94 @@
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) Try on
|
||||||
|
[](https://t.me/localaiofficial_bot)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||||
|
|
||||||

|
|
||||||
|
## 📚🆕 Local Stack Family
|
||||||
|
|
||||||
|
🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<a href="https://github.com/mudler/LocalAGI">
|
||||||
|
<img src="https://raw.githubusercontent.com/mudler/LocalAGI/refs/heads/main/webui/react-ui/public/logo_2.png" width="300" alt="LocalAGI Logo">
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<h3><a href="https://github.com/mudler/LocalAGI">LocalAGI</a></h3>
|
||||||
|
<p>A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<a href="https://github.com/mudler/LocalRecall">
|
||||||
|
<img src="https://raw.githubusercontent.com/mudler/LocalRecall/refs/heads/main/static/localrecall_horizontal.png" width="300" alt="LocalRecall Logo">
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td width="50%" valign="top">
|
||||||
|
<h3><a href="https://github.com/mudler/LocalRecall">LocalRecall</a></h3>
|
||||||
|
<p>A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
## Screenshots
|
||||||
|
|
||||||
|
|
||||||
|
| Talk Interface | Generate Audio |
|
||||||
|
| --- | --- |
|
||||||
|
|  |  |
|
||||||
|
|
||||||
|
| Models Overview | Generate Images |
|
||||||
|
| --- | --- |
|
||||||
|
|  |  |
|
||||||
|
|
||||||
|
| Chat Interface | Home |
|
||||||
|
| --- | --- |
|
||||||
|
|  |  |
|
||||||
|
|
||||||
|
| Login | Swarm |
|
||||||
|
| --- | --- |
|
||||||
|
| |  |
|
||||||
|
|
||||||
|
## 💻 Quickstart
|
||||||
|
|
||||||
Run the installer script:
|
Run the installer script:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# Basic installation
|
||||||
curl https://localai.io/install.sh | sh
|
curl https://localai.io/install.sh | sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
|
||||||
|
|
||||||
Or run with docker:
|
Or run with docker:
|
||||||
|
|
||||||
|
### CPU only image:
|
||||||
```bash
|
```bash
|
||||||
# CPU only image:
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
||||||
|
```
|
||||||
# Nvidia GPU:
|
### Nvidia GPU:
|
||||||
|
```bash
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||||
|
```
|
||||||
# CPU and GPU image (bigger size):
|
### CPU and GPU image (bigger size):
|
||||||
|
```bash
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||||
|
```
|
||||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
### AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
||||||
|
```bash
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -88,42 +144,26 @@ local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
|||||||
local-ai run oci://localai/phi-2:latest
|
local-ai run oci://localai/phi-2:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
For more information, see [💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
## 📰 Latest project news
|
## 📰 Latest project news
|
||||||
|
|
||||||
|
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
|
||||||
|
- Apr 2025: WebUI overhaul, AIO images updates
|
||||||
|
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
|
||||||
|
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
||||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
||||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
|
||||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
|
||||||
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||||
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
|
||||||
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
|
||||||
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
|
||||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
## 🔥🔥 Hot topics (looking for help):
|
|
||||||
|
|
||||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
|
||||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
|
||||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
|
||||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
|
||||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
|
||||||
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
|
||||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
|
||||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
|
||||||
|
|
||||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
|
||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
@@ -137,12 +177,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||||
|
- [Agentic capabilities](https://github.com/mudler/LocalAGI)
|
||||||
- 🔊 Voice activity detection (Silero-VAD support)
|
- 🔊 Voice activity detection (Silero-VAD support)
|
||||||
- 🌍 Integrated WebUI!
|
- 🌍 Integrated WebUI!
|
||||||
|
|
||||||
## 💻 Usage
|
|
||||||
|
|
||||||
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section in our documentation.
|
|
||||||
|
|
||||||
### 🔗 Community and integrations
|
### 🔗 Community and integrations
|
||||||
|
|
||||||
@@ -218,7 +256,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
|
|||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://www.spectrocloud.com/" target="blank">
|
<a href="https://www.spectrocloud.com/" target="blank">
|
||||||
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
|
<img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://www.premai.io/" target="blank">
|
<a href="https://www.premai.io/" target="blank">
|
||||||
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
name: text-embedding-ada-002
|
|
||||||
embeddings: true
|
embeddings: true
|
||||||
|
name: text-embedding-ada-002
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,56 +1,17 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
backend: stablediffusion
|
backend: stablediffusion-ggml
|
||||||
|
cfg_scale: 4.5
|
||||||
|
|
||||||
|
options:
|
||||||
|
- sampler:euler
|
||||||
parameters:
|
parameters:
|
||||||
model: stablediffusion_assets
|
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
||||||
|
step: 25
|
||||||
license: "BSD-3"
|
|
||||||
urls:
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
|
||||||
|
|
||||||
description: |
|
|
||||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
|
||||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
|
||||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
|
||||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
|
||||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/vocab.txt"
|
|
||||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/images/generations \
|
curl http://localhost:8080/v1/images/generations \
|
||||||
|
|||||||
@@ -1,101 +1,57 @@
|
|||||||
name: gpt-4
|
|
||||||
mmap: true
|
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
|
||||||
context_size: 8192
|
context_size: 8192
|
||||||
|
f16: true
|
||||||
stopwords:
|
|
||||||
- "<|im_end|>"
|
|
||||||
- "<dummy32000>"
|
|
||||||
- "</tool_call>"
|
|
||||||
- "<|eot_id|>"
|
|
||||||
- "<|end_of_text|>"
|
|
||||||
|
|
||||||
function:
|
function:
|
||||||
# disable injecting the "answer" tool
|
|
||||||
disable_no_action: true
|
|
||||||
|
|
||||||
grammar:
|
grammar:
|
||||||
# This allows the grammar to also return messages
|
no_mixed_free_string: true
|
||||||
mixed_mode: true
|
schema_type: llama3.1 # or JSON is supported too (json)
|
||||||
# Suffix to add to the grammar
|
response_regex:
|
||||||
#prefix: '<tool_call>\n'
|
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
|
||||||
# Force parallel calls in the grammar
|
mmap: true
|
||||||
# parallel_calls: true
|
name: gpt-4
|
||||||
|
parameters:
|
||||||
return_name_in_function_response: true
|
model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
||||||
# Without grammar uncomment the lines below
|
stopwords:
|
||||||
# Warning: this is relying only on the capability of the
|
- <|im_end|>
|
||||||
# LLM model to generate the correct function call.
|
- <dummy32000>
|
||||||
json_regex_match:
|
- <|eot_id|>
|
||||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
- <|end_of_text|>
|
||||||
- "(?s)<tool_call>(.*?)"
|
|
||||||
replace_llm_results:
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
replace_function_results:
|
|
||||||
# Replace everything that is not JSON array or object
|
|
||||||
#
|
|
||||||
- key: '(?s)^[^{\[]*'
|
|
||||||
value: ""
|
|
||||||
- key: '(?s)[^}\]]*$'
|
|
||||||
value: ""
|
|
||||||
- key: "'([^']*?)'"
|
|
||||||
value: "_DQUOTE_${1}_DQUOTE_"
|
|
||||||
- key: '\\"'
|
|
||||||
value: "__TEMP_QUOTE__"
|
|
||||||
- key: "\'"
|
|
||||||
value: "'"
|
|
||||||
- key: "_DQUOTE_"
|
|
||||||
value: '"'
|
|
||||||
- key: "__TEMP_QUOTE__"
|
|
||||||
value: '"'
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
||||||
<|im_start|>assistant
|
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||||
|
{{.Input }}
|
||||||
|
<|start_header_id|>assistant<|end_header_id|>
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||||
{{- if .FunctionCall }}
|
{{ if .FunctionCall -}}
|
||||||
<tool_call>
|
{{ else if eq .RoleName "tool" -}}
|
||||||
{{- else if eq .RoleName "tool" }}
|
The Function was executed and the response was:
|
||||||
<tool_response>
|
{{ end -}}
|
||||||
{{- end }}
|
{{ if .Content -}}
|
||||||
{{- if .Content}}
|
{{.Content -}}
|
||||||
{{.Content }}
|
{{ else if .FunctionCall -}}
|
||||||
{{- end }}
|
{{ range .FunctionCall }}
|
||||||
{{- if .FunctionCall}}
|
[{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
|
||||||
{{toJson .FunctionCall}}
|
{{ end }}
|
||||||
{{- end }}
|
{{ end -}}
|
||||||
{{- if .FunctionCall }}
|
<|eot_id|>
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
completion: |
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
function: |-
|
function: |
|
||||||
<|im_start|>system
|
<|start_header_id|>system<|end_header_id|>
|
||||||
You are a function calling AI model.
|
You are an expert in composing functions. You are given a question and a set of possible functions.
|
||||||
Here are the available tools:
|
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
|
||||||
<tools>
|
If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
|
||||||
{{range .Functions}}
|
If you decide to invoke any of the function(s), you MUST put it in the format as follows:
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
[func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
|
||||||
{{end}}
|
You SHOULD NOT include any other text in the response.
|
||||||
</tools>
|
Here is a list of functions in JSON format that you can invoke.
|
||||||
You should call the tools provided to you sequentially
|
{{toJson .Functions}}
|
||||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||||
<scratchpad>
|
{{.Input}}
|
||||||
{step-by-step reasoning and plan in bullet points}
|
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||||
</scratchpad>
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
download_files:
|
||||||
<tool_call>
|
- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
||||||
{"arguments": <args-dict>, "name": <function-name>}
|
sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
|
||||||
</tool_call><|im_end|>
|
uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
||||||
{{.Input -}}
|
|
||||||
<|im_start|>assistant
|
|
||||||
8
aio/cpu/vad.yaml
Normal file
8
aio/cpu/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
backend: silero-vad
|
||||||
|
name: silero-vad
|
||||||
|
parameters:
|
||||||
|
model: silero-vad.onnx
|
||||||
|
download_files:
|
||||||
|
- filename: silero-vad.onnx
|
||||||
|
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||||
|
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||||
@@ -1,31 +1,49 @@
|
|||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
|
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
name: gpt-4o
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: bakllava-mmproj.gguf
|
|
||||||
parameters:
|
parameters:
|
||||||
model: bakllava.gguf
|
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||||
|
stopwords:
|
||||||
|
- <|im_end|>
|
||||||
|
- <dummy32000>
|
||||||
|
- </s>
|
||||||
|
- <|endoftext|>
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
chat_message: |
|
||||||
|
<|im_start|>{{ .RoleName }}
|
||||||
|
{{ if .FunctionCall -}}
|
||||||
|
Function call:
|
||||||
|
{{ else if eq .RoleName "tool" -}}
|
||||||
|
Function response:
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .Content -}}
|
||||||
|
{{.Content }}
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .FunctionCall -}}
|
||||||
|
{{toJson .FunctionCall}}
|
||||||
|
{{ end -}}<|im_end|>
|
||||||
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
ASSISTANT:
|
function: |
|
||||||
|
<|im_start|>system
|
||||||
|
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||||
|
{{range .Functions}}
|
||||||
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
|
{{end}}
|
||||||
|
For each function call return a json object with function name and arguments
|
||||||
|
<|im_end|>
|
||||||
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: bakllava.gguf
|
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||||
- filename: bakllava-mmproj.gguf
|
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
|
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||||
usage: |
|
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4-vision-preview",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
||||||
@@ -129,7 +129,7 @@ detect_gpu
|
|||||||
detect_gpu_size
|
detect_gpu_size
|
||||||
|
|
||||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||||
|
|
||||||
check_vars
|
check_vars
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
backend: sentencetransformers
|
|
||||||
parameters:
|
parameters:
|
||||||
model: all-MiniLM-L6-v2
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,101 +1,53 @@
|
|||||||
name: gpt-4
|
context_size: 4096
|
||||||
mmap: true
|
f16: true
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
|
||||||
context_size: 8192
|
|
||||||
|
|
||||||
stopwords:
|
|
||||||
- "<|im_end|>"
|
|
||||||
- "<dummy32000>"
|
|
||||||
- "</tool_call>"
|
|
||||||
- "<|eot_id|>"
|
|
||||||
- "<|end_of_text|>"
|
|
||||||
|
|
||||||
function:
|
function:
|
||||||
# disable injecting the "answer" tool
|
capture_llm_results:
|
||||||
disable_no_action: true
|
- (?s)<Thought>(.*?)</Thought>
|
||||||
|
|
||||||
grammar:
|
grammar:
|
||||||
# This allows the grammar to also return messages
|
properties_order: name,arguments
|
||||||
mixed_mode: true
|
|
||||||
# Suffix to add to the grammar
|
|
||||||
#prefix: '<tool_call>\n'
|
|
||||||
# Force parallel calls in the grammar
|
|
||||||
# parallel_calls: true
|
|
||||||
|
|
||||||
return_name_in_function_response: true
|
|
||||||
# Without grammar uncomment the lines below
|
|
||||||
# Warning: this is relying only on the capability of the
|
|
||||||
# LLM model to generate the correct function call.
|
|
||||||
json_regex_match:
|
json_regex_match:
|
||||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
- (?s)<Output>(.*?)</Output>
|
||||||
- "(?s)<tool_call>(.*?)"
|
|
||||||
replace_llm_results:
|
replace_llm_results:
|
||||||
# Drop the scratchpad content from responses
|
- key: (?s)<Thought>(.*?)</Thought>
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
value: ""
|
||||||
replace_function_results:
|
mmap: true
|
||||||
# Replace everything that is not JSON array or object
|
name: gpt-4
|
||||||
#
|
parameters:
|
||||||
- key: '(?s)^[^{\[]*'
|
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||||
value: ""
|
stopwords:
|
||||||
- key: '(?s)[^}\]]*$'
|
- <|im_end|>
|
||||||
value: ""
|
- <dummy32000>
|
||||||
- key: "'([^']*?)'"
|
- </s>
|
||||||
value: "_DQUOTE_${1}_DQUOTE_"
|
|
||||||
- key: '\\"'
|
|
||||||
value: "__TEMP_QUOTE__"
|
|
||||||
- key: "\'"
|
|
||||||
value: "'"
|
|
||||||
- key: "_DQUOTE_"
|
|
||||||
value: '"'
|
|
||||||
- key: "__TEMP_QUOTE__"
|
|
||||||
value: '"'
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{ .RoleName }}
|
||||||
{{- if .FunctionCall }}
|
{{ if .FunctionCall -}}
|
||||||
<tool_call>
|
Function call:
|
||||||
{{- else if eq .RoleName "tool" }}
|
{{ else if eq .RoleName "tool" -}}
|
||||||
<tool_response>
|
Function response:
|
||||||
{{- end }}
|
{{ end -}}
|
||||||
{{- if .Content}}
|
{{ if .Content -}}
|
||||||
{{.Content }}
|
{{.Content }}
|
||||||
{{- end }}
|
{{ end -}}
|
||||||
{{- if .FunctionCall}}
|
{{ if .FunctionCall -}}
|
||||||
{{toJson .FunctionCall}}
|
{{toJson .FunctionCall}}
|
||||||
{{- end }}
|
{{ end -}}<|im_end|>
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
completion: |
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
function: |-
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
You are a function calling AI model.
|
You are an AI assistant that executes function calls, and these are the tools at your disposal:
|
||||||
Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
{{range .Functions}}
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
{{end}}
|
{{end}}
|
||||||
</tools>
|
<|im_end|>
|
||||||
You should call the tools provided to you sequentially
|
|
||||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
|
||||||
<scratchpad>
|
|
||||||
{step-by-step reasoning and plan in bullet points}
|
|
||||||
</scratchpad>
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{"arguments": <args-dict>, "name": <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
|
||||||
|
download_files:
|
||||||
|
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||||
|
sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
|
||||||
|
uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||||
|
|||||||
8
aio/gpu-8g/vad.yaml
Normal file
8
aio/gpu-8g/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
backend: silero-vad
|
||||||
|
name: silero-vad
|
||||||
|
parameters:
|
||||||
|
model: silero-vad.onnx
|
||||||
|
download_files:
|
||||||
|
- filename: silero-vad.onnx
|
||||||
|
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||||
|
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||||
@@ -1,35 +1,49 @@
|
|||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
|
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
name: gpt-4o
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
parameters:
|
parameters:
|
||||||
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||||
temperature: 0.2
|
stopwords:
|
||||||
top_k: 40
|
- <|im_end|>
|
||||||
top_p: 0.95
|
- <dummy32000>
|
||||||
seed: -1
|
- </s>
|
||||||
|
- <|endoftext|>
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
chat_message: |
|
||||||
|
<|im_start|>{{ .RoleName }}
|
||||||
|
{{ if .FunctionCall -}}
|
||||||
|
Function call:
|
||||||
|
{{ else if eq .RoleName "tool" -}}
|
||||||
|
Function response:
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .Content -}}
|
||||||
|
{{.Content }}
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .FunctionCall -}}
|
||||||
|
{{toJson .FunctionCall}}
|
||||||
|
{{ end -}}<|im_end|>
|
||||||
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
ASSISTANT:
|
function: |
|
||||||
|
<|im_start|>system
|
||||||
|
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||||
|
{{range .Functions}}
|
||||||
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
|
{{end}}
|
||||||
|
For each function call return a json object with function name and arguments
|
||||||
|
<|im_end|>
|
||||||
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
|
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||||
usage: |
|
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4-vision-preview",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
|
embeddings: true
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
backend: sentencetransformers
|
|
||||||
parameters:
|
parameters:
|
||||||
model: all-MiniLM-L6-v2
|
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,103 +1,53 @@
|
|||||||
name: gpt-4
|
context_size: 4096
|
||||||
mmap: false
|
f16: true
|
||||||
context_size: 8192
|
|
||||||
|
|
||||||
f16: false
|
|
||||||
parameters:
|
|
||||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
|
||||||
|
|
||||||
stopwords:
|
|
||||||
- "<|im_end|>"
|
|
||||||
- "<dummy32000>"
|
|
||||||
- "</tool_call>"
|
|
||||||
- "<|eot_id|>"
|
|
||||||
- "<|end_of_text|>"
|
|
||||||
|
|
||||||
function:
|
function:
|
||||||
# disable injecting the "answer" tool
|
capture_llm_results:
|
||||||
disable_no_action: true
|
- (?s)<Thought>(.*?)</Thought>
|
||||||
|
|
||||||
grammar:
|
grammar:
|
||||||
# This allows the grammar to also return messages
|
properties_order: name,arguments
|
||||||
mixed_mode: true
|
|
||||||
# Suffix to add to the grammar
|
|
||||||
#prefix: '<tool_call>\n'
|
|
||||||
# Force parallel calls in the grammar
|
|
||||||
# parallel_calls: true
|
|
||||||
|
|
||||||
return_name_in_function_response: true
|
|
||||||
# Without grammar uncomment the lines below
|
|
||||||
# Warning: this is relying only on the capability of the
|
|
||||||
# LLM model to generate the correct function call.
|
|
||||||
json_regex_match:
|
json_regex_match:
|
||||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
- (?s)<Output>(.*?)</Output>
|
||||||
- "(?s)<tool_call>(.*?)"
|
|
||||||
replace_llm_results:
|
replace_llm_results:
|
||||||
# Drop the scratchpad content from responses
|
- key: (?s)<Thought>(.*?)</Thought>
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
value: ""
|
||||||
replace_function_results:
|
mmap: true
|
||||||
# Replace everything that is not JSON array or object
|
name: gpt-4
|
||||||
#
|
parameters:
|
||||||
- key: '(?s)^[^{\[]*'
|
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||||
value: ""
|
stopwords:
|
||||||
- key: '(?s)[^}\]]*$'
|
- <|im_end|>
|
||||||
value: ""
|
- <dummy32000>
|
||||||
- key: "'([^']*?)'"
|
- </s>
|
||||||
value: "_DQUOTE_${1}_DQUOTE_"
|
|
||||||
- key: '\\"'
|
|
||||||
value: "__TEMP_QUOTE__"
|
|
||||||
- key: "\'"
|
|
||||||
value: "'"
|
|
||||||
- key: "_DQUOTE_"
|
|
||||||
value: '"'
|
|
||||||
- key: "__TEMP_QUOTE__"
|
|
||||||
value: '"'
|
|
||||||
# Drop the scratchpad content from responses
|
|
||||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
|
||||||
value: ""
|
|
||||||
|
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{ .RoleName }}
|
||||||
{{- if .FunctionCall }}
|
{{ if .FunctionCall -}}
|
||||||
<tool_call>
|
Function call:
|
||||||
{{- else if eq .RoleName "tool" }}
|
{{ else if eq .RoleName "tool" -}}
|
||||||
<tool_response>
|
Function response:
|
||||||
{{- end }}
|
{{ end -}}
|
||||||
{{- if .Content}}
|
{{ if .Content -}}
|
||||||
{{.Content }}
|
{{.Content }}
|
||||||
{{- end }}
|
{{ end -}}
|
||||||
{{- if .FunctionCall}}
|
{{ if .FunctionCall -}}
|
||||||
{{toJson .FunctionCall}}
|
{{toJson .FunctionCall}}
|
||||||
{{- end }}
|
{{ end -}}<|im_end|>
|
||||||
{{- if .FunctionCall }}
|
|
||||||
</tool_call>
|
|
||||||
{{- else if eq .RoleName "tool" }}
|
|
||||||
</tool_response>
|
|
||||||
{{- end }}<|im_end|>
|
|
||||||
completion: |
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
function: |-
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
You are a function calling AI model.
|
You are an AI assistant that executes function calls, and these are the tools at your disposal:
|
||||||
Here are the available tools:
|
|
||||||
<tools>
|
|
||||||
{{range .Functions}}
|
{{range .Functions}}
|
||||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
{{end}}
|
{{end}}
|
||||||
</tools>
|
<|im_end|>
|
||||||
You should call the tools provided to you sequentially
|
|
||||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
|
||||||
<scratchpad>
|
|
||||||
{step-by-step reasoning and plan in bullet points}
|
|
||||||
</scratchpad>
|
|
||||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
|
||||||
<tool_call>
|
|
||||||
{"arguments": <args-dict>, "name": <function-name>}
|
|
||||||
</tool_call><|im_end|>
|
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
|
|
||||||
|
download_files:
|
||||||
|
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||||
|
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
|
||||||
|
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||||
8
aio/intel/vad.yaml
Normal file
8
aio/intel/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
backend: silero-vad
|
||||||
|
name: silero-vad
|
||||||
|
parameters:
|
||||||
|
model: silero-vad.onnx
|
||||||
|
download_files:
|
||||||
|
- filename: silero-vad.onnx
|
||||||
|
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||||
|
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||||
@@ -1,35 +1,50 @@
|
|||||||
backend: llama-cpp
|
|
||||||
context_size: 4096
|
context_size: 4096
|
||||||
mmap: false
|
f16: true
|
||||||
f16: false
|
mmap: true
|
||||||
|
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
name: gpt-4o
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
|
||||||
user: "USER:"
|
|
||||||
assistant: "ASSISTANT:"
|
|
||||||
system: "SYSTEM:"
|
|
||||||
|
|
||||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
|
||||||
parameters:
|
parameters:
|
||||||
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||||
temperature: 0.2
|
stopwords:
|
||||||
top_k: 40
|
- <|im_end|>
|
||||||
top_p: 0.95
|
- <dummy32000>
|
||||||
seed: -1
|
- </s>
|
||||||
|
- <|endoftext|>
|
||||||
template:
|
template:
|
||||||
chat: |
|
chat: |
|
||||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
chat_message: |
|
||||||
|
<|im_start|>{{ .RoleName }}
|
||||||
|
{{ if .FunctionCall -}}
|
||||||
|
Function call:
|
||||||
|
{{ else if eq .RoleName "tool" -}}
|
||||||
|
Function response:
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .Content -}}
|
||||||
|
{{.Content }}
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .FunctionCall -}}
|
||||||
|
{{toJson .FunctionCall}}
|
||||||
|
{{ end -}}<|im_end|>
|
||||||
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
ASSISTANT:
|
function: |
|
||||||
|
<|im_start|>system
|
||||||
|
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||||
|
{{range .Functions}}
|
||||||
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
|
{{end}}
|
||||||
|
For each function call return a json object with function name and arguments
|
||||||
|
<|im_end|>
|
||||||
|
{{.Input -}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||||
|
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||||
usage: |
|
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "gpt-4-vision-preview",
|
|
||||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
|
||||||
15
assets.go
15
assets.go
@@ -1,6 +1,15 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import "embed"
|
import (
|
||||||
|
rice "github.com/GeertJohan/go.rice"
|
||||||
|
)
|
||||||
|
|
||||||
//go:embed backend-assets/*
|
var backendAssets *rice.Box
|
||||||
var backendAssets embed.FS
|
|
||||||
|
func init() {
|
||||||
|
var err error
|
||||||
|
backendAssets, err = rice.FindBox("backend-assets")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ service Backend {
|
|||||||
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
||||||
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
||||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||||
|
rpc GenerateVideo(GenerateVideoRequest) returns (Result) {}
|
||||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||||
rpc TTS(TTSRequest) returns (Result) {}
|
rpc TTS(TTSRequest) returns (Result) {}
|
||||||
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
||||||
@@ -159,6 +160,12 @@ message Reply {
|
|||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
int32 tokens = 2;
|
int32 tokens = 2;
|
||||||
int32 prompt_tokens = 3;
|
int32 prompt_tokens = 3;
|
||||||
|
double timing_prompt_processing = 4;
|
||||||
|
double timing_token_generation = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message GrammarTrigger {
|
||||||
|
string word = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -184,11 +191,7 @@ message ModelOptions {
|
|||||||
int32 NGQA = 20;
|
int32 NGQA = 20;
|
||||||
string ModelFile = 21;
|
string ModelFile = 21;
|
||||||
|
|
||||||
// AutoGPTQ
|
|
||||||
string Device = 22;
|
|
||||||
bool UseTriton = 23;
|
|
||||||
string ModelBaseName = 24;
|
|
||||||
bool UseFastTokenizer = 25;
|
|
||||||
|
|
||||||
// Diffusers
|
// Diffusers
|
||||||
string PipelineType = 26;
|
string PipelineType = 26;
|
||||||
@@ -222,6 +225,11 @@ message ModelOptions {
|
|||||||
int32 MaxModelLen = 54;
|
int32 MaxModelLen = 54;
|
||||||
int32 TensorParallelSize = 55;
|
int32 TensorParallelSize = 55;
|
||||||
string LoadFormat = 58;
|
string LoadFormat = 58;
|
||||||
|
bool DisableLogStatus = 66;
|
||||||
|
string DType = 67;
|
||||||
|
int32 LimitImagePerPrompt = 68;
|
||||||
|
int32 LimitVideoPerPrompt = 69;
|
||||||
|
int32 LimitAudioPerPrompt = 70;
|
||||||
|
|
||||||
string MMProj = 41;
|
string MMProj = 41;
|
||||||
|
|
||||||
@@ -245,6 +253,8 @@ message ModelOptions {
|
|||||||
|
|
||||||
string CacheTypeKey = 63;
|
string CacheTypeKey = 63;
|
||||||
string CacheTypeValue = 64;
|
string CacheTypeValue = 64;
|
||||||
|
|
||||||
|
repeated GrammarTrigger GrammarTriggers = 65;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -292,6 +302,19 @@ message GenerateImageRequest {
|
|||||||
int32 CLIPSkip = 11;
|
int32 CLIPSkip = 11;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message GenerateVideoRequest {
|
||||||
|
string prompt = 1;
|
||||||
|
string start_image = 2; // Path or base64 encoded image for the start frame
|
||||||
|
string end_image = 3; // Path or base64 encoded image for the end frame
|
||||||
|
int32 width = 4;
|
||||||
|
int32 height = 5;
|
||||||
|
int32 num_frames = 6; // Number of frames to generate
|
||||||
|
int32 fps = 7; // Frames per second
|
||||||
|
int32 seed = 8;
|
||||||
|
float cfg_scale = 9; // Classifier-free guidance scale
|
||||||
|
string dst = 10; // Output path for the generated video
|
||||||
|
}
|
||||||
|
|
||||||
message TTSRequest {
|
message TTSRequest {
|
||||||
string text = 1;
|
string text = 1;
|
||||||
string model = 2;
|
string model = 2;
|
||||||
|
|||||||
@@ -1,17 +1,17 @@
|
|||||||
|
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
set(TARGET myclip)
|
# set(TARGET myclip)
|
||||||
add_library(${TARGET} clip.cpp clip.h llava.cpp llava.h)
|
# add_library(${TARGET} clip.cpp clip.h clip-impl.h llava.cpp llava.h)
|
||||||
install(TARGETS ${TARGET} LIBRARY)
|
# install(TARGETS ${TARGET} LIBRARY)
|
||||||
target_include_directories(myclip PUBLIC .)
|
# target_include_directories(myclip PUBLIC .)
|
||||||
target_include_directories(myclip PUBLIC ../..)
|
# target_include_directories(myclip PUBLIC ../..)
|
||||||
target_include_directories(myclip PUBLIC ../../common)
|
# target_include_directories(myclip PUBLIC ../../common)
|
||||||
target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
# target_link_libraries(${TARGET} PRIVATE common ggml llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
# target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
if (NOT MSVC)
|
# if (NOT MSVC)
|
||||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
# target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||||
endif()
|
# endif()
|
||||||
# END CLIP hack
|
# END CLIP hack
|
||||||
|
|
||||||
|
|
||||||
@@ -75,7 +75,11 @@ add_library(hw_grpc_proto
|
|||||||
${hw_proto_hdrs} )
|
${hw_proto_hdrs} )
|
||||||
|
|
||||||
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama myclip ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
|
||||||
|
target_include_directories(${TARGET} PRIVATE ../llava)
|
||||||
|
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
||||||
|
|
||||||
|
target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
||||||
absl::flags_parse
|
absl::flags_parse
|
||||||
gRPC::${_REFLECTION}
|
gRPC::${_REFLECTION}
|
||||||
gRPC::${_GRPC_GRPCPP}
|
gRPC::${_GRPC_GRPCPP}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|||||||
TARGET?=--target grpc-server
|
TARGET?=--target grpc-server
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
@@ -36,11 +36,18 @@ else ifeq ($(OS),Darwin)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl" \
|
||||||
|
-DGGML_SYCL_F16=ON
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE),sycl_f32)
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||||
endif
|
endif
|
||||||
|
|
||||||
llama.cpp:
|
llama.cpp:
|
||||||
@@ -52,8 +59,8 @@ llama.cpp:
|
|||||||
git checkout -b build $(LLAMA_VERSION) && \
|
git checkout -b build $(LLAMA_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
llama.cpp/examples/grpc-server: llama.cpp
|
llama.cpp/tools/grpc-server: llama.cpp
|
||||||
mkdir -p llama.cpp/examples/grpc-server
|
mkdir -p llama.cpp/tools/grpc-server
|
||||||
bash prepare.sh
|
bash prepare.sh
|
||||||
|
|
||||||
rebuild:
|
rebuild:
|
||||||
@@ -63,13 +70,13 @@ rebuild:
|
|||||||
|
|
||||||
purge:
|
purge:
|
||||||
rm -rf llama.cpp/build
|
rm -rf llama.cpp/build
|
||||||
rm -rf llama.cpp/examples/grpc-server
|
rm -rf llama.cpp/tools/grpc-server
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
|
|
||||||
clean: purge
|
clean: purge
|
||||||
rm -rf llama.cpp
|
rm -rf llama.cpp
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
|||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
|
||||||
index 3cd0d2fa..6c5e811a 100644
|
index 3cd0d2fa..6c5e811a 100644
|
||||||
--- a/examples/llava/clip.cpp
|
--- a/tools/mtmd/clip.cpp
|
||||||
+++ b/examples/llava/clip.cpp
|
+++ b/tools/mtmd/clip.cpp
|
||||||
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||||
|
|||||||
@@ -7,21 +7,22 @@ for patch in $(ls patches); do
|
|||||||
patch -d llama.cpp/ -p1 < patches/$patch
|
patch -d llama.cpp/ -p1 < patches/$patch
|
||||||
done
|
done
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
cp -rfv json.hpp llama.cpp/tools/grpc-server/
|
||||||
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
cp -rfv utils.hpp llama.cpp/tools/grpc-server/
|
||||||
|
|
||||||
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
||||||
echo "grpc-server already added"
|
echo "grpc-server already added"
|
||||||
else
|
else
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
||||||
fi
|
fi
|
||||||
|
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
# cp -rfv llama.cpp/tools/mtmd/clip.h llama.cpp/tools/grpc-server/clip.h
|
||||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
# cp -rfv llama.cpp/tools/mtmd/clip-impl.h llama.cpp/tools/grpc-server/clip-impl.h
|
||||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
# cp -rfv llama.cpp/tools/mtmd/llava.cpp llama.cpp/tools/grpc-server/llava.cpp
|
||||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
# echo '#include "llama.h"' > llama.cpp/tools/grpc-server/llava.h
|
||||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
# cat llama.cpp/tools/mtmd/llava.h >> llama.cpp/tools/grpc-server/llava.h
|
||||||
|
# cp -rfv llama.cpp/tools/mtmd/clip.cpp llama.cpp/tools/grpc-server/clip.cpp
|
||||||
431
backend/cpp/llama/utils.hpp
vendored
431
backend/cpp/llama/utils.hpp
vendored
@@ -1,4 +1,4 @@
|
|||||||
// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
|
// https://github.com/ggerganov/llama.cpp/blob/master/tools/server/utils.hpp
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
|
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
#include "../llava/clip.h"
|
#include "../mtmd/clip.h"
|
||||||
|
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
|
|
||||||
@@ -480,4 +480,431 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
|||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// tokenizer and input processing utils
|
||||||
|
//
|
||||||
|
|
||||||
|
static bool json_is_array_of_numbers(const json & data) {
|
||||||
|
if (data.is_array()) {
|
||||||
|
for (const auto & e : data) {
|
||||||
|
if (!e.is_number_integer()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// is array having BOTH numbers & strings?
|
||||||
|
static bool json_is_array_of_mixed_numbers_strings(const json & data) {
|
||||||
|
bool seen_string = false;
|
||||||
|
bool seen_number = false;
|
||||||
|
if (data.is_array()) {
|
||||||
|
for (const auto & e : data) {
|
||||||
|
seen_string |= e.is_string();
|
||||||
|
seen_number |= e.is_number_integer();
|
||||||
|
if (seen_number && seen_string) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get value by path(key1 / key2)
|
||||||
|
static json json_get_nested_values(const std::vector<std::string> & paths, const json & js) {
|
||||||
|
json result = json::object();
|
||||||
|
|
||||||
|
for (const std::string & path : paths) {
|
||||||
|
json current = js;
|
||||||
|
const auto keys = string_split<std::string>(path, /*separator*/ '/');
|
||||||
|
bool valid_path = true;
|
||||||
|
for (const std::string & k : keys) {
|
||||||
|
if (valid_path && current.is_object() && current.contains(k)) {
|
||||||
|
current = current[k];
|
||||||
|
} else {
|
||||||
|
valid_path = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (valid_path) {
|
||||||
|
result[path] = current;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* this handles 2 cases:
|
||||||
|
* - only string, example: "string"
|
||||||
|
* - mixed string and tokens, example: [12, 34, "string", 56, 78]
|
||||||
|
*/
|
||||||
|
static llama_tokens tokenize_mixed(const llama_vocab * vocab, const json & json_prompt, bool add_special, bool parse_special) {
|
||||||
|
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
||||||
|
// or the first element of the json_prompt array is a string.
|
||||||
|
llama_tokens prompt_tokens;
|
||||||
|
|
||||||
|
if (json_prompt.is_array()) {
|
||||||
|
bool first = true;
|
||||||
|
for (const auto & p : json_prompt) {
|
||||||
|
if (p.is_string()) {
|
||||||
|
auto s = p.template get<std::string>();
|
||||||
|
|
||||||
|
llama_tokens p;
|
||||||
|
if (first) {
|
||||||
|
p = common_tokenize(vocab, s, add_special, parse_special);
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
p = common_tokenize(vocab, s, false, parse_special);
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||||
|
} else {
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt_tokens.push_back(p.template get<llama_token>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto s = json_prompt.template get<std::string>();
|
||||||
|
prompt_tokens = common_tokenize(vocab, s, add_special, parse_special);
|
||||||
|
}
|
||||||
|
|
||||||
|
return prompt_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* break the input "prompt" object into multiple prompt if needed, then tokenize them
|
||||||
|
* this supports these cases:
|
||||||
|
* - "prompt": "string"
|
||||||
|
* - "prompt": [12, 34, 56]
|
||||||
|
* - "prompt": [12, 34, "string", 56, 78]
|
||||||
|
* and multiple prompts (multi-tasks):
|
||||||
|
* - "prompt": ["string1", "string2"]
|
||||||
|
* - "prompt": ["string1", [12, 34, 56]]
|
||||||
|
* - "prompt": [[12, 34, 56], [78, 90, 12]]
|
||||||
|
* - "prompt": [[12, 34, "string", 56, 78], [12, 34, 56]]
|
||||||
|
*/
|
||||||
|
static std::vector<llama_tokens> tokenize_input_prompts(const llama_vocab * vocab, const json & json_prompt, bool add_special, bool parse_special) {
|
||||||
|
std::vector<llama_tokens> result;
|
||||||
|
if (json_prompt.is_string() || json_is_array_of_mixed_numbers_strings(json_prompt)) {
|
||||||
|
// string or mixed
|
||||||
|
result.push_back(tokenize_mixed(vocab, json_prompt, add_special, parse_special));
|
||||||
|
} else if (json_is_array_of_numbers(json_prompt)) {
|
||||||
|
// array of tokens
|
||||||
|
result.push_back(json_prompt.get<llama_tokens>());
|
||||||
|
} else if (json_prompt.is_array()) {
|
||||||
|
// array of prompts
|
||||||
|
result.reserve(json_prompt.size());
|
||||||
|
for (const auto & p : json_prompt) {
|
||||||
|
if (p.is_string() || json_is_array_of_mixed_numbers_strings(p)) {
|
||||||
|
result.push_back(tokenize_mixed(vocab, p, add_special, parse_special));
|
||||||
|
} else if (json_is_array_of_numbers(p)) {
|
||||||
|
// array of tokens
|
||||||
|
result.push_back(p.get<llama_tokens>());
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("element of \"prompt\" must be a string, an list of tokens, or a list of mixed strings & tokens");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("\"prompt\" must be a string, an list of tokens, a list of mixed strings & tokens, or a list of prompts");
|
||||||
|
}
|
||||||
|
if (result.empty()) {
|
||||||
|
throw std::runtime_error("\"prompt\" must not be empty");
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// utils for interacting with libmtmd
|
||||||
|
// (may need to refactor in near future)
|
||||||
|
//
|
||||||
|
|
||||||
|
/**
|
||||||
|
* server_tokens is a helper to manage the input tokens and image for the server.
|
||||||
|
* it is made this way to simplify the logic of KV cache management.
|
||||||
|
*/
|
||||||
|
struct server_tokens {
|
||||||
|
bool has_mtmd = false;
|
||||||
|
|
||||||
|
private: // disallow accessing these members directly, risking out-of-sync
|
||||||
|
|
||||||
|
// map a **start** position in tokens to the image chunk
|
||||||
|
std::unordered_map<llama_pos, mtmd::input_chunk_ptr> map_pos_to_image;
|
||||||
|
|
||||||
|
// list of tokens
|
||||||
|
// it can include LLAMA_TOKEN_NULL, which is used to indicate a token that is not a text token
|
||||||
|
// a mtmd_input_chunk can occupy multiple tokens, one llama_token per **position**
|
||||||
|
// important: for models using mrope, an image can contain multiple tokens but will use only one **position**
|
||||||
|
llama_tokens tokens;
|
||||||
|
|
||||||
|
// for ex. with input of 5 text tokens and 2 images:
|
||||||
|
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
||||||
|
// pos 0 1 2 3 4 5 6 7 8 9
|
||||||
|
// map_pos_to_image will contain: {5, img0}, {8, img1}
|
||||||
|
|
||||||
|
public:
|
||||||
|
server_tokens() = default;
|
||||||
|
~server_tokens() = default;
|
||||||
|
|
||||||
|
// Prevent copying
|
||||||
|
server_tokens(const server_tokens&) = delete;
|
||||||
|
server_tokens& operator=(const server_tokens&) = delete;
|
||||||
|
|
||||||
|
// Allow moving (usually implicitly generated if members are movable)
|
||||||
|
server_tokens(server_tokens&&) = default;
|
||||||
|
server_tokens& operator=(server_tokens&&) = default;
|
||||||
|
|
||||||
|
// Allow accessing elements using [] operator
|
||||||
|
llama_token operator[](size_t index) { return tokens[index]; }
|
||||||
|
const llama_token& operator[](size_t index) const { return tokens[index]; }
|
||||||
|
|
||||||
|
server_tokens(mtmd::input_chunks & mtmd_chunks, bool has_mtmd) : has_mtmd(has_mtmd) {
|
||||||
|
for (size_t i = 0; i < mtmd_chunks.size(); ++i) {
|
||||||
|
push_back(mtmd_chunks[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
server_tokens(llama_tokens & tokens, bool has_mtmd) : has_mtmd(has_mtmd), tokens(tokens) {}
|
||||||
|
|
||||||
|
// for debugging
|
||||||
|
std::string str() const {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "tokens: ";
|
||||||
|
for (const auto & t : tokens) {
|
||||||
|
if (t == LLAMA_TOKEN_NULL) {
|
||||||
|
oss << "<embd> ";
|
||||||
|
} else {
|
||||||
|
oss << t << " ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
oss << "\n";
|
||||||
|
oss << "image pos: ";
|
||||||
|
for (const auto & it : map_pos_to_image) {
|
||||||
|
oss << it.first << ", ";
|
||||||
|
}
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
const mtmd::input_chunk_ptr & find_chunk(llama_pos pos) const {
|
||||||
|
auto it = map_pos_to_image.find(pos);
|
||||||
|
if (it != map_pos_to_image.end()) {
|
||||||
|
return it->second;
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("Chunk not found");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_back(llama_token tok) {
|
||||||
|
if (tok == LLAMA_TOKEN_NULL) {
|
||||||
|
throw std::runtime_error("Invalid token");
|
||||||
|
}
|
||||||
|
tokens.emplace_back(tok);
|
||||||
|
}
|
||||||
|
|
||||||
|
// will create a copy of the chunk if it contains non-text data
|
||||||
|
void push_back(const mtmd_input_chunk * chunk) {
|
||||||
|
auto type = mtmd_input_chunk_get_type(chunk);
|
||||||
|
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
||||||
|
GGML_ASSERT(has_mtmd);
|
||||||
|
auto img_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
||||||
|
const int n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
||||||
|
llama_pos start_pos = tokens.size();
|
||||||
|
for (int i = 0; i < n_pos; ++i) {
|
||||||
|
tokens.emplace_back(LLAMA_TOKEN_NULL);
|
||||||
|
}
|
||||||
|
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
|
||||||
|
map_pos_to_image[start_pos] = std::move(new_chunk);
|
||||||
|
} else if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
||||||
|
size_t n_tokens;
|
||||||
|
auto text_tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
||||||
|
for (size_t i = 0; i < n_tokens; ++i) {
|
||||||
|
push_back(text_tokens[i]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
GGML_ABORT("Invalid chunk type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for compatibility with context shift and prompt truncation
|
||||||
|
void insert(const llama_tokens & inp_tokens) {
|
||||||
|
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||||
|
tokens.insert(tokens.end(), inp_tokens.begin(), inp_tokens.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
// for compatibility with speculative decoding, ctx shift, slot save/load
|
||||||
|
const llama_tokens & get_text_tokens() const {
|
||||||
|
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
// for compatibility with speculative decoding
|
||||||
|
void set_token(llama_pos pos, llama_token id) {
|
||||||
|
GGML_ASSERT(!has_mtmd); // only allow this if mtmd is disabled
|
||||||
|
tokens[pos] = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size() const {
|
||||||
|
return tokens.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const {
|
||||||
|
return tokens.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
tokens.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void resize(size_t n) {
|
||||||
|
GGML_ASSERT(n <= tokens.size());
|
||||||
|
if (has_mtmd) {
|
||||||
|
// we throw an error if we try to remove a token in the middle of an image
|
||||||
|
// for ex. with input of 5 text tokens and 2 images:
|
||||||
|
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
||||||
|
// n 1 2 3 4 5 6 7 8 9 10
|
||||||
|
// allowed to resize ^ ^
|
||||||
|
// disallowed to resize ^ ^ ^
|
||||||
|
if (n > 0) {
|
||||||
|
llama_token last_token = tokens[n - 1];
|
||||||
|
// make sure we never remove tokens in the middle of an image
|
||||||
|
if (last_token == LLAMA_TOKEN_NULL) {
|
||||||
|
find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// remove all image chunks that are not used anymore
|
||||||
|
for (auto it = map_pos_to_image.begin(); it != map_pos_to_image.end(); ) {
|
||||||
|
llama_pos pos = it->first;
|
||||||
|
if (pos >= (llama_pos)n) {
|
||||||
|
it = map_pos_to_image.erase(it);
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokens.resize(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string detokenize(const llama_context * ctx, bool special) const {
|
||||||
|
llama_tokens text_tokens;
|
||||||
|
text_tokens.reserve(tokens.size());
|
||||||
|
for (const auto & t : tokens) {
|
||||||
|
if (t != LLAMA_TOKEN_NULL) {
|
||||||
|
text_tokens.push_back(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return common_detokenize(ctx, text_tokens, special);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_common_prefix(const server_tokens & b) const {
|
||||||
|
size_t max_idx = std::min(tokens.size(), b.tokens.size());
|
||||||
|
for (size_t i = 0; i < max_idx; ++i) {
|
||||||
|
auto & ai = tokens[i];
|
||||||
|
auto & bi = b.tokens[i];
|
||||||
|
|
||||||
|
if (ai == LLAMA_TOKEN_NULL && bi == LLAMA_TOKEN_NULL) {
|
||||||
|
GGML_ASSERT(has_mtmd);
|
||||||
|
const auto & a_chunk = find_chunk(i);
|
||||||
|
const auto & b_chunk = b.find_chunk(i);
|
||||||
|
GGML_ASSERT(a_chunk && b_chunk);
|
||||||
|
const auto * a_img = mtmd_input_chunk_get_tokens_image(a_chunk.get());
|
||||||
|
const auto * b_img = mtmd_input_chunk_get_tokens_image(b_chunk.get());
|
||||||
|
std::string ai_id = mtmd_image_tokens_get_id(a_img);
|
||||||
|
std::string bi_id = mtmd_image_tokens_get_id(b_img);
|
||||||
|
size_t a_pos = mtmd_image_tokens_get_n_pos(a_img);
|
||||||
|
size_t b_pos = mtmd_image_tokens_get_n_pos(b_img);
|
||||||
|
if (ai_id == bi_id && a_pos == b_pos) {
|
||||||
|
GGML_ASSERT(a_pos > 0 && "Invalid image token"); // should never happen
|
||||||
|
i += a_pos - 1; // will be +1 by the for loop
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
} else if (ai == bi) {
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return max_idx; // all tokens are equal
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure all text tokens are within the vocab range
|
||||||
|
bool validate(const struct llama_context * ctx) const {
|
||||||
|
const llama_model * model = llama_get_model(ctx);
|
||||||
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||||
|
const int32_t n_vocab = llama_vocab_n_tokens(vocab);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < tokens.size(); ++i) {
|
||||||
|
auto & t = tokens[i];
|
||||||
|
if (t == LLAMA_TOKEN_NULL) {
|
||||||
|
try {
|
||||||
|
const auto & chunk = find_chunk(i);
|
||||||
|
const auto * img_tokens = mtmd_input_chunk_get_tokens_image(chunk.get());
|
||||||
|
size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
||||||
|
i += n_pos - 1; // will be +1 by the for loop
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (t < 0 || t >= n_vocab) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode and decode the image chunk
|
||||||
|
int32_t process_chunk(
|
||||||
|
llama_context * ctx,
|
||||||
|
mtmd_context * mctx,
|
||||||
|
llama_pos n_past,
|
||||||
|
int32_t seq_id,
|
||||||
|
llama_pos & n_pos_out) {
|
||||||
|
auto it = map_pos_to_image.find(n_past);
|
||||||
|
if (it == map_pos_to_image.end()) {
|
||||||
|
throw std::runtime_error("Chunk not found");
|
||||||
|
}
|
||||||
|
// SRV_INF("%s\n", "processing image...");
|
||||||
|
int32_t n_batch = llama_n_batch(ctx);
|
||||||
|
int64_t t0 = ggml_time_ms();
|
||||||
|
llama_pos new_n_past = n_past;
|
||||||
|
int32_t result = mtmd_helper_eval_chunk_single(mctx, ctx,
|
||||||
|
it->second.get(), // chunk
|
||||||
|
n_past,
|
||||||
|
seq_id,
|
||||||
|
n_batch,
|
||||||
|
true, // logits last
|
||||||
|
&new_n_past);
|
||||||
|
//SRV_INF("image processed in %" PRId64 " ms\n", ggml_time_ms() - t0);
|
||||||
|
if (result != 0) {
|
||||||
|
LOG_ERR("mtmd_helper_eval failed with status %d", result);
|
||||||
|
n_pos_out = n_past;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
n_pos_out = new_n_past;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Computes FNV-1a hash of the data
|
||||||
|
static std::string fnv_hash(const uint8_t * data, size_t len) {
|
||||||
|
const uint64_t fnv_prime = 0x100000001b3ULL;
|
||||||
|
uint64_t hash = 0xcbf29ce484222325ULL;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < len; ++i) {
|
||||||
|
hash ^= data[i];
|
||||||
|
hash *= fnv_prime;
|
||||||
|
}
|
||||||
|
return std::to_string(hash);
|
||||||
}
|
}
|
||||||
@@ -8,12 +8,19 @@ ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|||||||
# keep standard at C11 and C++11
|
# keep standard at C11 and C++11
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||||
|
|
||||||
|
GOCMD?=go
|
||||||
|
CGO_LDFLAGS?=
|
||||||
|
# Avoid parent make file overwriting CGO_LDFLAGS which is needed for hipblas
|
||||||
|
CGO_LDFLAGS_SYCL=
|
||||||
|
GO_TAGS?=
|
||||||
|
LD_FLAGS?=
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
CMAKE_ARGS+=-DSD_CUDA=ON
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
# to CMAKE_ARGS automatically
|
# to CMAKE_ARGS automatically
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
@@ -23,29 +30,48 @@ else ifeq ($(BUILD_TYPE),clblas)
|
|||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
CMAKE_ARGS+=-DSD_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
# But if it's OSX without metal, disable it here
|
# But if it's OSX without metal, disable it here
|
||||||
else ifeq ($(OS),Darwin)
|
else ifeq ($(OS),Darwin)
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DSD_METAL=OFF
|
||||||
else
|
else
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
CMAKE_ARGS+=-DSD_METAL=ON
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
TARGET+=--target ggml-metal
|
TARGET+=--target ggml-metal
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f16)
|
ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
# endif
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DSD_SYCL=ON \
|
||||||
|
-DGGML_SYCL_F16=ON
|
||||||
|
CC=icx
|
||||||
|
CXX=icpx
|
||||||
|
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||||
|
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||||
|
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||||
|
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||||
|
endif
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f32)
|
ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
|
CMAKE_ARGS+=-DGGML_SYCL=ON \
|
||||||
# endif
|
-DCMAKE_C_COMPILER=icx \
|
||||||
|
-DCMAKE_CXX_COMPILER=icpx \
|
||||||
|
-DSD_SYCL=ON
|
||||||
|
CC=icx
|
||||||
|
CXX=icpx
|
||||||
|
CGO_LDFLAGS_SYCL += -fsycl -L${DNNLROOT}/lib -ldnnl ${MKLROOT}/lib/intel64/libmkl_sycl.a -fiopenmp -fopenmp-targets=spir64 -lOpenCL
|
||||||
|
CGO_LDFLAGS_SYCL += $(shell pkg-config --libs mkl-static-lp64-gomp)
|
||||||
|
CGO_CXXFLAGS += -fiopenmp -fopenmp-targets=spir64
|
||||||
|
CGO_CXXFLAGS += $(shell pkg-config --cflags mkl-static-lp64-gomp )
|
||||||
|
endif
|
||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
# CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||||
|
|
||||||
# Find all .a archives in ARCHIVE_DIR
|
# Find all .a archives in ARCHIVE_DIR
|
||||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
||||||
@@ -86,11 +112,24 @@ endif
|
|||||||
$(MAKE) $(COMBINED_LIB)
|
$(MAKE) $(COMBINED_LIB)
|
||||||
|
|
||||||
gosd.o:
|
gosd.o:
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
|
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c"
|
||||||
|
else
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||||
|
endif
|
||||||
|
|
||||||
libsd.a: gosd.o
|
libsd.a: gosd.o
|
||||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
||||||
$(AR) rcs libsd.a gosd.o
|
$(AR) rcs libsd.a gosd.o
|
||||||
|
|
||||||
|
stablediffusion-ggml:
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_SYCL)" C_INCLUDE_PATH="$(INCLUDE_PATH)" LIBRARY_PATH="$(LIBRARY_PATH)" \
|
||||||
|
CC="$(CC)" CXX="$(CXX)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o ../../../../backend-assets/grpc/stablediffusion-ggml ./
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) ../../../../backend-assets/grpc/stablediffusion-ggml
|
||||||
|
endif
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
||||||
@@ -35,6 +35,8 @@ const char* sample_method_str[] = {
|
|||||||
"ipndm",
|
"ipndm",
|
||||||
"ipndm_v",
|
"ipndm_v",
|
||||||
"lcm",
|
"lcm",
|
||||||
|
"ddim_trailing",
|
||||||
|
"tcd",
|
||||||
};
|
};
|
||||||
|
|
||||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||||
@@ -173,6 +175,7 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
|||||||
-1, //clip_skip
|
-1, //clip_skip
|
||||||
cfg_scale, // sfg_scale
|
cfg_scale, // sfg_scale
|
||||||
3.5f,
|
3.5f,
|
||||||
|
0, // eta
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
sample_method,
|
sample_method,
|
||||||
|
|||||||
@@ -1,21 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Image struct {
|
|
||||||
base.SingleThread
|
|
||||||
stablediffusion *stablediffusion.StableDiffusion
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
|
||||||
var err error
|
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
|
||||||
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
return image.stablediffusion.GenerateImage(
|
|
||||||
int(opts.Height),
|
|
||||||
int(opts.Width),
|
|
||||||
int(opts.Mode),
|
|
||||||
int(opts.Step),
|
|
||||||
int(opts.Seed),
|
|
||||||
opts.PositivePrompt,
|
|
||||||
opts.NegativePrompt,
|
|
||||||
opts.Dst)
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/tinydream"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Image struct {
|
|
||||||
base.SingleThread
|
|
||||||
tinydream *tinydream.TinyDream
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
|
||||||
var err error
|
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
|
||||||
image.tinydream, err = tinydream.New(opts.ModelFile)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
return image.tinydream.GenerateImage(
|
|
||||||
int(opts.Height),
|
|
||||||
int(opts.Width),
|
|
||||||
int(opts.Step),
|
|
||||||
int(opts.Seed),
|
|
||||||
opts.PositivePrompt,
|
|
||||||
opts.NegativePrompt,
|
|
||||||
opts.Dst)
|
|
||||||
}
|
|
||||||
@@ -1,204 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/go-llama.cpp"
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
|
||||||
|
|
||||||
type LLM struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
llama *llama.LLama
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|
||||||
ropeFreqBase := float32(10000)
|
|
||||||
ropeFreqScale := float32(1)
|
|
||||||
|
|
||||||
if opts.RopeFreqBase != 0 {
|
|
||||||
ropeFreqBase = opts.RopeFreqBase
|
|
||||||
}
|
|
||||||
if opts.RopeFreqScale != 0 {
|
|
||||||
ropeFreqScale = opts.RopeFreqScale
|
|
||||||
}
|
|
||||||
|
|
||||||
llamaOpts := []llama.ModelOption{
|
|
||||||
llama.WithRopeFreqBase(ropeFreqBase),
|
|
||||||
llama.WithRopeFreqScale(ropeFreqScale),
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.NGQA != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.RMSNormEps != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.ContextSize != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
|
||||||
}
|
|
||||||
if opts.F16Memory {
|
|
||||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
|
||||||
}
|
|
||||||
if opts.Embeddings {
|
|
||||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
|
||||||
}
|
|
||||||
if opts.NGPULayers != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
|
||||||
}
|
|
||||||
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
|
|
||||||
if opts.NBatch != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
|
|
||||||
} else {
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.NUMA {
|
|
||||||
llamaOpts = append(llamaOpts, llama.EnableNUMA)
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.LowVRAM {
|
|
||||||
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
|
||||||
}
|
|
||||||
|
|
||||||
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
|
||||||
llm.llama = model
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
|
||||||
ropeFreqBase := float32(10000)
|
|
||||||
ropeFreqScale := float32(1)
|
|
||||||
|
|
||||||
if opts.RopeFreqBase != 0 {
|
|
||||||
ropeFreqBase = opts.RopeFreqBase
|
|
||||||
}
|
|
||||||
if opts.RopeFreqScale != 0 {
|
|
||||||
ropeFreqScale = opts.RopeFreqScale
|
|
||||||
}
|
|
||||||
predictOptions := []llama.PredictOption{
|
|
||||||
llama.SetTemperature(opts.Temperature),
|
|
||||||
llama.SetTopP(opts.TopP),
|
|
||||||
llama.SetTopK(int(opts.TopK)),
|
|
||||||
llama.SetTokens(int(opts.Tokens)),
|
|
||||||
llama.SetThreads(int(opts.Threads)),
|
|
||||||
llama.WithGrammar(opts.Grammar),
|
|
||||||
llama.SetRopeFreqBase(ropeFreqBase),
|
|
||||||
llama.SetRopeFreqScale(ropeFreqScale),
|
|
||||||
llama.SetNegativePromptScale(opts.NegativePromptScale),
|
|
||||||
llama.SetNegativePrompt(opts.NegativePrompt),
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.PromptCacheAll {
|
|
||||||
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.PromptCacheRO {
|
|
||||||
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expected absolute path
|
|
||||||
if opts.PromptCachePath != "" {
|
|
||||||
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Mirostat != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.MirostatETA != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.MirostatTAU != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Debug {
|
|
||||||
predictOptions = append(predictOptions, llama.Debug)
|
|
||||||
}
|
|
||||||
|
|
||||||
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
|
|
||||||
|
|
||||||
if opts.PresencePenalty != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.NKeep != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.F16KV {
|
|
||||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.IgnoreEOS {
|
|
||||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
|
|
||||||
}
|
|
||||||
|
|
||||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
|
||||||
|
|
||||||
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
|
|
||||||
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
|
|
||||||
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
|
|
||||||
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
|
|
||||||
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
|
|
||||||
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
|
|
||||||
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
|
|
||||||
return predictOptions
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
|
||||||
|
|
||||||
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
|
||||||
results <- token
|
|
||||||
return true
|
|
||||||
}))
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
|
||||||
|
|
||||||
if len(opts.EmbeddingTokens) > 0 {
|
|
||||||
tokens := []int{}
|
|
||||||
for _, t := range opts.EmbeddingTokens {
|
|
||||||
tokens = append(tokens, int(t))
|
|
||||||
}
|
|
||||||
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
|
|
||||||
}
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func isNormalized(k []float32) bool {
|
func isNormalized(k []float32) bool {
|
||||||
var sum float32
|
var sum float64
|
||||||
|
|
||||||
for _, v := range k {
|
for _, v := range k {
|
||||||
sum += v
|
v64 := float64(v)
|
||||||
|
sum += v64*v64
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum == 1.0
|
s := math.Sqrt(sum)
|
||||||
|
|
||||||
|
return s >= 0.99 && s <= 1.01
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This we could replace with handwritten SIMD code
|
// TODO: This we could replace with handwritten SIMD code
|
||||||
@@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
|
|||||||
dot += k1[i] * k2[i]
|
dot += k1[i] * k2[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
|
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
|
||||||
|
|
||||||
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
||||||
return dot
|
return dot
|
||||||
@@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
|
|||||||
|
|
||||||
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
||||||
|
|
||||||
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
|
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
|
||||||
|
|
||||||
return sim
|
return sim
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.Transcript
|
|||||||
context.SetTranslate(true)
|
context.SetTranslate(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := context.Process(data, nil, nil); err != nil {
|
if err := context.Process(data, nil, nil, nil); err != nil {
|
||||||
return pb.TranscriptResult{}, err
|
return pb.TranscriptResult{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
# Creating a separate environment for the autogptq project
|
|
||||||
|
|
||||||
```
|
|
||||||
make autogptq
|
|
||||||
```
|
|
||||||
@@ -1,153 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
from concurrent import futures
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import base64
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
from auto_gptq import AutoGPTQForCausalLM
|
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
||||||
from transformers import TextGenerationPipeline
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
def Health(self, request, context):
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
try:
|
|
||||||
device = "cuda:0"
|
|
||||||
if request.Device != "":
|
|
||||||
device = request.Device
|
|
||||||
|
|
||||||
# support loading local model files
|
|
||||||
model_path = os.path.join(os.environ.get('MODELS_PATH', './'), request.Model)
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=request.TrustRemoteCode)
|
|
||||||
|
|
||||||
# support model `Qwen/Qwen-VL-Chat-Int4`
|
|
||||||
if "qwen-vl" in request.Model.lower():
|
|
||||||
self.model_name = "Qwen-VL-Chat"
|
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_path,
|
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
|
||||||
device_map="auto").eval()
|
|
||||||
else:
|
|
||||||
model = AutoGPTQForCausalLM.from_quantized(model_path,
|
|
||||||
model_basename=request.ModelBaseName,
|
|
||||||
use_safetensors=True,
|
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
|
||||||
device=device,
|
|
||||||
use_triton=request.UseTriton,
|
|
||||||
quantize_config=None)
|
|
||||||
|
|
||||||
self.model = model
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
penalty = 1.0
|
|
||||||
if request.Penalty != 0.0:
|
|
||||||
penalty = request.Penalty
|
|
||||||
tokens = 512
|
|
||||||
if request.Tokens != 0:
|
|
||||||
tokens = request.Tokens
|
|
||||||
top_p = 0.95
|
|
||||||
if request.TopP != 0.0:
|
|
||||||
top_p = request.TopP
|
|
||||||
|
|
||||||
|
|
||||||
prompt_images = self.recompile_vl_prompt(request)
|
|
||||||
compiled_prompt = prompt_images[0]
|
|
||||||
print(f"Prompt: {compiled_prompt}", file=sys.stderr)
|
|
||||||
|
|
||||||
# Implement Predict RPC
|
|
||||||
pipeline = TextGenerationPipeline(
|
|
||||||
model=self.model,
|
|
||||||
tokenizer=self.tokenizer,
|
|
||||||
max_new_tokens=tokens,
|
|
||||||
temperature=request.Temperature,
|
|
||||||
top_p=top_p,
|
|
||||||
repetition_penalty=penalty,
|
|
||||||
)
|
|
||||||
t = pipeline(compiled_prompt)[0]["generated_text"]
|
|
||||||
print(f"generated_text: {t}", file=sys.stderr)
|
|
||||||
|
|
||||||
if compiled_prompt in t:
|
|
||||||
t = t.replace(compiled_prompt, "")
|
|
||||||
# house keeping. Remove the image files from /tmp folder
|
|
||||||
for img_path in prompt_images[1]:
|
|
||||||
try:
|
|
||||||
os.remove(img_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
|
||||||
|
|
||||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
# Implement PredictStream RPC
|
|
||||||
#for reply in some_data_generator():
|
|
||||||
# yield reply
|
|
||||||
# Not implemented yet
|
|
||||||
return self.Predict(request, context)
|
|
||||||
|
|
||||||
def recompile_vl_prompt(self, request):
|
|
||||||
prompt = request.Prompt
|
|
||||||
image_paths = []
|
|
||||||
|
|
||||||
if "qwen-vl" in self.model_name.lower():
|
|
||||||
# request.Images is an array which contains base64 encoded images. Iterate the request.Images array, decode and save each image to /tmp folder with a random filename.
|
|
||||||
# Then, save the image file paths to an array "image_paths".
|
|
||||||
# read "request.Prompt", replace "[img-%d]" with the image file paths in the order they appear in "image_paths". Save the new prompt to "prompt".
|
|
||||||
for i, img in enumerate(request.Images):
|
|
||||||
timestamp = str(int(time.time() * 1000)) # Generate timestamp
|
|
||||||
img_path = f"/tmp/vl-{timestamp}.jpg" # Use timestamp in filename
|
|
||||||
with open(img_path, "wb") as f:
|
|
||||||
f.write(base64.b64decode(img))
|
|
||||||
image_paths.append(img_path)
|
|
||||||
prompt = prompt.replace(f"[img-{i}]", "<img>" + img_path + "</img>,")
|
|
||||||
else:
|
|
||||||
prompt = request.Prompt
|
|
||||||
return (prompt, image_paths)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
accelerate
|
|
||||||
auto-gptq==0.7.1
|
|
||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
transformers
|
|
||||||
@@ -61,7 +61,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.69.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
grpcio-tools
|
||||||
@@ -86,7 +86,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
transformers
|
transformers==4.48.3
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1
|
torch==2.4.1
|
||||||
coqui-tts
|
coqui-tts
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch==2.4.1+cu118
|
||||||
torchaudio==2.4.1+cu118
|
torchaudio==2.4.1+cu118
|
||||||
transformers
|
transformers==4.48.3
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
coqui-tts
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
torch==2.4.1
|
torch==2.4.1
|
||||||
torchaudio==2.4.1
|
torchaudio==2.4.1
|
||||||
transformers
|
transformers==4.48.3
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
coqui-tts
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.4.1+rocm6.0
|
torch==2.4.1+rocm6.0
|
||||||
torchaudio==2.4.1+rocm6.0
|
torchaudio==2.4.1+rocm6.0
|
||||||
transformers
|
transformers==4.48.3
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
coqui-tts
|
||||||
@@ -5,6 +5,6 @@ torchaudio==2.3.1+cxx11.abi
|
|||||||
oneccl_bind_pt==2.3.100+xpu
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools
|
||||||
transformers
|
transformers==4.48.3
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
coqui-tts
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
@@ -19,7 +19,7 @@ import grpc
|
|||||||
|
|
||||||
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline, Lumina2Text2ImgPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
from diffusers.utils import load_image, export_to_video
|
from diffusers.utils import load_image, export_to_video
|
||||||
from compel import Compel, ReturnedEmbeddingsType
|
from compel import Compel, ReturnedEmbeddingsType
|
||||||
@@ -159,6 +159,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
torchType = torch.float16
|
torchType = torch.float16
|
||||||
variant = "fp16"
|
variant = "fp16"
|
||||||
|
|
||||||
|
options = request.Options
|
||||||
|
|
||||||
|
# empty dict
|
||||||
|
self.options = {}
|
||||||
|
|
||||||
|
# The options are a list of strings in this form optname:optvalue
|
||||||
|
# We are storing all the options in a dict so we can use it later when
|
||||||
|
# generating the images
|
||||||
|
for opt in options:
|
||||||
|
if ":" not in opt:
|
||||||
|
continue
|
||||||
|
key, value = opt.split(":")
|
||||||
|
self.options[key] = value
|
||||||
|
|
||||||
|
print(f"Options: {self.options}", file=sys.stderr)
|
||||||
|
|
||||||
local = False
|
local = False
|
||||||
modelFile = request.Model
|
modelFile = request.Model
|
||||||
|
|
||||||
@@ -275,6 +291,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
if request.LowVRAM:
|
if request.LowVRAM:
|
||||||
self.pipe.enable_model_cpu_offload()
|
self.pipe.enable_model_cpu_offload()
|
||||||
|
elif request.PipelineType == "Lumina2Text2ImgPipeline":
|
||||||
|
self.pipe = Lumina2Text2ImgPipeline.from_pretrained(
|
||||||
|
request.Model,
|
||||||
|
torch_dtype=torch.bfloat16)
|
||||||
|
if request.LowVRAM:
|
||||||
|
self.pipe.enable_model_cpu_offload()
|
||||||
elif request.PipelineType == "SanaPipeline":
|
elif request.PipelineType == "SanaPipeline":
|
||||||
self.pipe = SanaPipeline.from_pretrained(
|
self.pipe = SanaPipeline.from_pretrained(
|
||||||
request.Model,
|
request.Model,
|
||||||
@@ -441,6 +463,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
kwargs = {key: options.get(key) for key in keys if key in options}
|
||||||
|
|
||||||
|
# populate kwargs from self.options.
|
||||||
|
kwargs.update(self.options)
|
||||||
|
|
||||||
# Set seed
|
# Set seed
|
||||||
if request.seed > 0:
|
if request.seed > 0:
|
||||||
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
|
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
|
||||||
@@ -501,7 +526,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.69.0
|
grpcio==1.72.0
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -105,7 +105,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.72.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
.PHONY: autogptq
|
.DEFAULT_GOAL := install
|
||||||
autogptq: protogen
|
|
||||||
|
.PHONY: install
|
||||||
|
install:
|
||||||
bash install.sh
|
bash install.sh
|
||||||
|
$(MAKE) protogen
|
||||||
|
|
||||||
.PHONY: protogen
|
.PHONY: protogen
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
@@ -10,7 +13,7 @@ protogen-clean:
|
|||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
bash protogen.sh
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: protogen-clean
|
clean: protogen-clean
|
||||||
@@ -1,88 +1,73 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Extra gRPC server for HuggingFace SentenceTransformer models.
|
This is an extra gRPC server of LocalAI for Bark TTS
|
||||||
"""
|
"""
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
import time
|
||||||
import argparse
|
import argparse
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
import backend_pb2
|
||||||
import backend_pb2_grpc
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
# Implement the BackendServicer class with the service methods
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
"""
|
"""
|
||||||
A gRPC servicer for the backend service.
|
BackendServicer is the class that implements the gRPC service
|
||||||
|
|
||||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
|
||||||
"""
|
"""
|
||||||
def Health(self, request, context):
|
def Health(self, request, context):
|
||||||
"""
|
|
||||||
A gRPC method that returns the health status of the backend service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A HealthRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Reply object that contains the health status of the backend service.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
def LoadModel(self, request, context):
|
||||||
"""
|
device = "cpu"
|
||||||
A gRPC method that loads a model into memory.
|
# Get device
|
||||||
|
# device = "cuda" if request.CUDA else "cpu"
|
||||||
|
if request.CUDA:
|
||||||
|
device = "cuda"
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A LoadModelRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Result object that contains the result of the LoadModel operation.
|
|
||||||
"""
|
|
||||||
model_name = request.Model
|
|
||||||
try:
|
try:
|
||||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
print("Preparing models, please wait", file=sys.stderr)
|
||||||
|
self.model = WhisperModel(request.Model, device=device, compute_type="float16")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
# Implement your logic here for the LoadModel service
|
# Implement your logic here for the LoadModel service
|
||||||
# Replace this with your desired response
|
# Replace this with your desired response
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
def AudioTranscription(self, request, context):
|
||||||
"""
|
resultSegments = []
|
||||||
A gRPC method that calculates embeddings for a given sentence.
|
text = ""
|
||||||
|
try:
|
||||||
Args:
|
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
|
||||||
request: An EmbeddingRequest object that contains the request parameters.
|
id = 0
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
for segment in segments:
|
||||||
|
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
||||||
Returns:
|
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
|
||||||
An EmbeddingResult object that contains the calculated embeddings.
|
text += segment.text
|
||||||
"""
|
id += 1
|
||||||
# Implement your logic here for the Embedding service
|
except Exception as err:
|
||||||
# Replace this with your desired response
|
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
|
||||||
sentence_embeddings = self.model.encode(request.Embeddings)
|
|
||||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
|
|
||||||
|
|
||||||
|
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user