mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
329 Commits
speculativ
...
v2.27.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d7ac09e96 | ||
|
|
c2a39e3639 | ||
|
|
ae625a4d00 | ||
|
|
7f3a029596 | ||
|
|
b34cf00819 | ||
|
|
d4a10b4300 | ||
|
|
9c74d74f7b | ||
|
|
679ee7bea4 | ||
|
|
77d7dc62c4 | ||
|
|
699519d1fe | ||
|
|
8faf39d34e | ||
|
|
5d261a6fcd | ||
|
|
22d5727089 | ||
|
|
c965197d6f | ||
|
|
994a6c4939 | ||
|
|
f926d2a72b | ||
|
|
ddeb9ed93e | ||
|
|
c7e99c7b59 | ||
|
|
6fabc92e56 | ||
|
|
4645b3c919 | ||
|
|
134fe2705c | ||
|
|
3cca32ba7e | ||
|
|
c069e61b26 | ||
|
|
7fa159e164 | ||
|
|
5f92025617 | ||
|
|
333e1bc732 | ||
|
|
e90b97c144 | ||
|
|
747eeb1d46 | ||
|
|
5d2c53abc0 | ||
|
|
0b1e721242 | ||
|
|
8c76a9ce99 | ||
|
|
338321af5b | ||
|
|
2774a92484 | ||
|
|
1a6bfb41a1 | ||
|
|
314981eaf8 | ||
|
|
d7266c633d | ||
|
|
eb4d5f2b95 | ||
|
|
c63b449ad6 | ||
|
|
dd4a778c2c | ||
|
|
a0896d21d6 | ||
|
|
0e697f951a | ||
|
|
fa4bb9082d | ||
|
|
8ff7b15441 | ||
|
|
dd45f85a20 | ||
|
|
decdd9e522 | ||
|
|
31a21d4a2c | ||
|
|
2c129843a7 | ||
|
|
ce71a0bcfb | ||
|
|
0a32c38317 | ||
|
|
36f596f260 | ||
|
|
953552545b | ||
|
|
835e55b1de | ||
|
|
dcd2921eaa | ||
|
|
5e6459fd18 | ||
|
|
50ddb3eb59 | ||
|
|
5eebfee4b5 | ||
|
|
567919ea90 | ||
|
|
27a3997530 | ||
|
|
192ba2c657 | ||
|
|
92abac9ca8 | ||
|
|
04ebbbd73a | ||
|
|
55305e0d95 | ||
|
|
67623639e4 | ||
|
|
cc76def342 | ||
|
|
4967fa5928 | ||
|
|
2b98e4ec56 | ||
|
|
fa1d058ee2 | ||
|
|
a49a588bfa | ||
|
|
ca7dda61c6 | ||
|
|
ffedddd76d | ||
|
|
766c76ae8e | ||
|
|
3096ff33e9 | ||
|
|
90a7451da4 | ||
|
|
529a4b9ee8 | ||
|
|
0567e104eb | ||
|
|
ecbeacd022 | ||
|
|
2772960e41 | ||
|
|
1b694191e2 | ||
|
|
69578a5f8f | ||
|
|
7d96cfe72b | ||
|
|
423514a5a5 | ||
|
|
12568c7d6d | ||
|
|
8d16a0a536 | ||
|
|
87ca801f00 | ||
|
|
e4ecbb6c30 | ||
|
|
b1a67de2b9 | ||
|
|
71a23910fe | ||
|
|
0ede31f9cf | ||
|
|
9f5dcf2d1e | ||
|
|
e878556e98 | ||
|
|
b096928172 | ||
|
|
db7442ae67 | ||
|
|
b6cd430e08 | ||
|
|
478e50cda2 | ||
|
|
1db2b9943c | ||
|
|
ac41aa8b67 | ||
|
|
156a98e2e7 | ||
|
|
d88ec1209e | ||
|
|
fde8dbfc80 | ||
|
|
879dc73eba | ||
|
|
1dfc52de16 | ||
|
|
1331129485 | ||
|
|
1cd98062e5 | ||
|
|
9791d9b77a | ||
|
|
8956452a45 | ||
|
|
f3659fa49c | ||
|
|
585f2be793 | ||
|
|
d13f160222 | ||
|
|
db5495b9d7 | ||
|
|
3def1ae232 | ||
|
|
c6ebead8e5 | ||
|
|
cff4a950e0 | ||
|
|
e4fa894153 | ||
|
|
69caccfa82 | ||
|
|
ab50c13160 | ||
|
|
56d4e82b14 | ||
|
|
09b5bd48bc | ||
|
|
957dcfb6a9 | ||
|
|
67f7bffd18 | ||
|
|
de81b42b49 | ||
|
|
06eb7e9fa7 | ||
|
|
45bc1ac566 | ||
|
|
02aafeff75 | ||
|
|
6b46c52789 | ||
|
|
d732e261a4 | ||
|
|
807c574e91 | ||
|
|
bb171a39b3 | ||
|
|
941a4fc50e | ||
|
|
afe65bd7bf | ||
|
|
6f9762049c | ||
|
|
122970d70d | ||
|
|
8664b1c7a2 | ||
|
|
c92166f38a | ||
|
|
d616058b12 | ||
|
|
a7b4001b75 | ||
|
|
ff85f01459 | ||
|
|
695f81a08b | ||
|
|
326be287da | ||
|
|
0404d98190 | ||
|
|
0a8ec1eb22 | ||
|
|
d860932dcd | ||
|
|
1cb137bd2d | ||
|
|
3c279e5568 | ||
|
|
fb55e3df57 | ||
|
|
de46fb6e2e | ||
|
|
d7a0e3c5ea | ||
|
|
0533ea817d | ||
|
|
755e4fb5f4 | ||
|
|
e4fdde158f | ||
|
|
6d0712fa6d | ||
|
|
bbbb28e3ca | ||
|
|
3bf2e9d065 | ||
|
|
1461fd8777 | ||
|
|
054860539a | ||
|
|
c87870b18e | ||
|
|
5ad2be9c45 | ||
|
|
61a24746a1 | ||
|
|
d557eb9361 | ||
|
|
a9a1a361a9 | ||
|
|
12d070af80 | ||
|
|
8d40557bc8 | ||
|
|
5a5f3a899a | ||
|
|
a2d1f133c8 | ||
|
|
0ae6420c31 | ||
|
|
3a3e05cf18 | ||
|
|
6a20388e25 | ||
|
|
06c836a937 | ||
|
|
049a13fe78 | ||
|
|
30bf6c962f | ||
|
|
a72b3a23c3 | ||
|
|
e9971b168a | ||
|
|
5b59b5e0c1 | ||
|
|
8cfd712428 | ||
|
|
21f7faa80d | ||
|
|
a6a0121118 | ||
|
|
ba66aa33c5 | ||
|
|
8fc024a770 | ||
|
|
52aa9d08aa | ||
|
|
4c9379c39e | ||
|
|
0ff2c39364 | ||
|
|
1af7e5dc49 | ||
|
|
af3bb64e42 | ||
|
|
77281f836e | ||
|
|
550275811d | ||
|
|
c27ce6c54d | ||
|
|
ac4991b069 | ||
|
|
25bee71bb8 | ||
|
|
b993780a3b | ||
|
|
ea0c9f1168 | ||
|
|
08311f275a | ||
|
|
4de0f2f737 | ||
|
|
42ae807c41 | ||
|
|
94593ba4c3 | ||
|
|
6a6e1a0ea9 | ||
|
|
5b19af99ff | ||
|
|
28fb8e607a | ||
|
|
bb85b6ef00 | ||
|
|
b9b5a635ca | ||
|
|
131ea5b627 | ||
|
|
fac70e9642 | ||
|
|
7e76ea40fb | ||
|
|
de09ae42ef | ||
|
|
6424f0666d | ||
|
|
f3ae94ca70 | ||
|
|
09c9f67a02 | ||
|
|
c264ca542d | ||
|
|
bbf30d416d | ||
|
|
27617a1b06 | ||
|
|
e84081769e | ||
|
|
20119fc580 | ||
|
|
09941c0bfb | ||
|
|
cabe0f4993 | ||
|
|
1977c7f190 | ||
|
|
061e7c4eae | ||
|
|
5313e660f6 | ||
|
|
9e32fda304 | ||
|
|
83202cae54 | ||
|
|
d96addfa9d | ||
|
|
a715fe588d | ||
|
|
2ac4a86bb4 | ||
|
|
8670d480a6 | ||
|
|
af0b4ff237 | ||
|
|
e694764065 | ||
|
|
f3c27e0381 | ||
|
|
bf44319d0d | ||
|
|
5b133a640b | ||
|
|
0030a3fe75 | ||
|
|
0a748b009e | ||
|
|
257e951def | ||
|
|
fbd82a2dd0 | ||
|
|
5db321dad2 | ||
|
|
f5638a6354 | ||
|
|
5f64cc6328 | ||
|
|
28b10e8804 | ||
|
|
3277f5095d | ||
|
|
fe3ced2919 | ||
|
|
45e37a07bb | ||
|
|
e57b750ca3 | ||
|
|
49df492268 | ||
|
|
516cd660f1 | ||
|
|
8fd3ace9a1 | ||
|
|
099469cb05 | ||
|
|
6be8c0c618 | ||
|
|
3cddf24747 | ||
|
|
c330360785 | ||
|
|
8cd51570e5 | ||
|
|
0e7aa5cd15 | ||
|
|
e06a5f49de | ||
|
|
fb2f847507 | ||
|
|
e01acc88c9 | ||
|
|
7a5912908a | ||
|
|
4b1b942a7f | ||
|
|
230fe0098f | ||
|
|
cc163429dc | ||
|
|
f670e0a91c | ||
|
|
731674eee7 | ||
|
|
cc1f6f913f | ||
|
|
7f90ff7aec | ||
|
|
8d45670e41 | ||
|
|
e4b8ddb6a1 | ||
|
|
a801561f81 | ||
|
|
16ced07102 | ||
|
|
d35595372d | ||
|
|
81be192279 | ||
|
|
28a1310890 | ||
|
|
2a702e9ca4 | ||
|
|
3ecaea1b6e | ||
|
|
7daf5ac3e3 | ||
|
|
7bc80c17f8 | ||
|
|
1996ceb293 | ||
|
|
0bc3dc43da | ||
|
|
3324c4e6cb | ||
|
|
7329db4e78 | ||
|
|
464686aee6 | ||
|
|
bfa3d4ccff | ||
|
|
6a91288c8c | ||
|
|
96cb407ee0 | ||
|
|
5a19094d3a | ||
|
|
e3b943ffcb | ||
|
|
df30d6a482 | ||
|
|
c3c27b7e3d | ||
|
|
431716d4d6 | ||
|
|
d290fd159f | ||
|
|
051faaf771 | ||
|
|
41a2dfb0d9 | ||
|
|
ed0094c3d0 | ||
|
|
52fadeded1 | ||
|
|
a37fa8d9c4 | ||
|
|
03974a4dd4 | ||
|
|
1d6afbd65d | ||
|
|
d79f02ea09 | ||
|
|
ba2f426e3e | ||
|
|
732042e5c6 | ||
|
|
f1763aabf2 | ||
|
|
e0d90b173b | ||
|
|
ff07612bfa | ||
|
|
7badaf78a0 | ||
|
|
af41436f1b | ||
|
|
cd5489ce47 | ||
|
|
60ec2cf751 | ||
|
|
244f4b564f | ||
|
|
f1d6d65417 | ||
|
|
72e52c4f6a | ||
|
|
1656e1a88e | ||
|
|
7f62b418a4 | ||
|
|
1f4e66d638 | ||
|
|
a37b2c765c | ||
|
|
b4b67e00bd | ||
|
|
91e1ff5a95 | ||
|
|
d9204ea3b5 | ||
|
|
3d0fbcb4f7 | ||
|
|
03f3df9a82 | ||
|
|
fff35d5528 | ||
|
|
539e94db73 | ||
|
|
0f4f62cf3c | ||
|
|
e7cffd7afa | ||
|
|
26d790a2b6 | ||
|
|
5cf838c08d | ||
|
|
4db8f5cbce | ||
|
|
3b6b37a81b | ||
|
|
8f5aa2d9de | ||
|
|
a6bc8aa7c7 | ||
|
|
4ab107bc1a | ||
|
|
4c3710a531 | ||
|
|
901b06284a | ||
|
|
8eef5a2c5e | ||
|
|
e9cace137b | ||
|
|
9409c99738 | ||
|
|
4d44ebc2f2 |
@@ -1,23 +0,0 @@
|
||||
meta {
|
||||
name: musicgen
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model_id": "facebook/musicgen-small",
|
||||
"text": "Exciting 80s Newscast Interstitial",
|
||||
"duration_seconds": 8
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
meta {
|
||||
name: backend monitor
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/monitor
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}"
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
meta {
|
||||
name: backend-shutdown
|
||||
type: http
|
||||
seq: 3
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/backend/shutdown
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}"
|
||||
}
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
{
|
||||
"version": "1",
|
||||
"name": "LocalAI Test Requests",
|
||||
"type": "collection"
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
vars {
|
||||
HOST: localhost
|
||||
PORT: 8080
|
||||
DEFAULT_MODEL: gpt-3.5-turbo
|
||||
PROTOCOL: http://
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: get models list
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
meta {
|
||||
name: Generate image
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/images/generations
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"prompt": "<positive prompt>|<negative prompt>",
|
||||
"model": "model-name",
|
||||
"step": 51,
|
||||
"size": "1024x1024",
|
||||
"image": ""
|
||||
}
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
meta {
|
||||
name: -completions
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"prompt": "function downloadFile(string url, string outputPath) {",
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
meta {
|
||||
name: -edits
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/edits
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "What day of the wek is it?",
|
||||
"instruction": "Fix the spelling mistakes"
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: -embeddings
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/embeddings
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
meta {
|
||||
name: chat completion -simple- 1 message-
|
||||
type: http
|
||||
seq: 4
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "How could one use friction to cook an egg?"
|
||||
}
|
||||
],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.2,
|
||||
"grammar": ""
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
meta {
|
||||
name: chat-completions -long-
|
||||
type: http
|
||||
seq: 5
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "system", "content": "You are a helpful, intelligent pirate assistant. Always answer as helpfully as possible, while being a pirate and using pirate language. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."},
|
||||
{"role": "user", "content": "How could one use electricity to cook an egg?"},
|
||||
{"role": "assistant",
|
||||
"content": "Shiver me timbers! Using electricity to cook an egg? Well, matey, I reckon that be a right curious idea! *adjusts spectacles* Now, I ain't sure if this be possible, but I'll do me best to help ye out.\n\nFirst things first, ye gotta understand that electricity be a powerful force, and it can't just cook an egg on its own. Ye see, electricity be like a mighty wind that can make things happen, but it needs somethin' to work with. So, if ye want to use electricity to cook an egg, ye gotta find a way to harness that power and make it do the cookin'.\n\nNow, I know what ye might be thinkin': \"How do I harness the power of electricity to cook an egg?\" Well, matey, there be a few ways to do it. One way be to use a special device called an \"electric frying pan.\" This be a pan that has a built-in heating element that gets hot when ye plug it into a wall socket. When the element gets hot, ye can crack an egg into the pan and watch as it cook"
|
||||
},
|
||||
{"role": "user", "content": "I don't have one of those, just a raw wire and plenty of power! How do we get it done?"}],
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.5
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
meta {
|
||||
name: chat-completions -stream-
|
||||
type: http
|
||||
seq: 6
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/chat/completions
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"messages": [{"role": "user", "content": "Explain how I can set sail on the ocean using only power generated by seagulls?"}],
|
||||
"max_tokens": 256,
|
||||
"temperature": 0.9,
|
||||
"stream": true
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: add model gallery
|
||||
type: http
|
||||
seq: 10
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"url": "file:///home/dave/projects/model-gallery/huggingface/TheBloke__CodeLlama-7B-Instruct-GGML.yaml",
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
meta {
|
||||
name: delete model gallery
|
||||
type: http
|
||||
seq: 11
|
||||
}
|
||||
|
||||
delete {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"name": "test"
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: list MODELS in galleries
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/available
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: list model GALLERIES
|
||||
type: http
|
||||
seq: 8
|
||||
}
|
||||
|
||||
get {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
meta {
|
||||
name: model delete
|
||||
type: http
|
||||
seq: 7
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
||||
body: none
|
||||
auth: none
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
meta {
|
||||
name: model gallery apply -gist-
|
||||
type: http
|
||||
seq: 12
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q2_K.bin"
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: model gallery apply
|
||||
type: http
|
||||
seq: 9
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/apply
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"id": "dave@TheBloke__CodeLlama-7B-Instruct-GGML__codellama-7b-instruct.ggmlv3.Q3_K_S.bin",
|
||||
"name": "codellama7b"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -1,16 +0,0 @@
|
||||
meta {
|
||||
name: transcribe
|
||||
type: http
|
||||
seq: 1
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
||||
body: multipartForm
|
||||
auth: none
|
||||
}
|
||||
|
||||
body:multipart-form {
|
||||
file: @file(transcription/gb1.ogg)
|
||||
model: whisper-1
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
meta {
|
||||
name: -tts
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"model": "{{DEFAULT_MODEL}}",
|
||||
"input": "A STRANGE GAME.\nTHE ONLY WINNING MOVE IS NOT TO PLAY.\n\nHOW ABOUT A NICE GAME OF CHESS?"
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
meta {
|
||||
name: musicgen
|
||||
type: http
|
||||
seq: 2
|
||||
}
|
||||
|
||||
post {
|
||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/tts
|
||||
body: json
|
||||
auth: none
|
||||
}
|
||||
|
||||
headers {
|
||||
Content-Type: application/json
|
||||
}
|
||||
|
||||
body:json {
|
||||
{
|
||||
"backend": "transformers",
|
||||
"model": "facebook/musicgen-small",
|
||||
"input": "80s Synths playing Jazz"
|
||||
}
|
||||
}
|
||||
2
.github/labeler.yml
vendored
2
.github/labeler.yml
vendored
@@ -1,4 +1,4 @@
|
||||
enhancements:
|
||||
enhancement:
|
||||
- head-branch: ['^feature', 'feature']
|
||||
|
||||
dependencies:
|
||||
|
||||
2
.github/workflows/bump_deps.yaml
vendored
2
.github/workflows/bump_deps.yaml
vendored
@@ -9,7 +9,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- repository: "ggerganov/llama.cpp"
|
||||
- repository: "ggml-org/llama.cpp"
|
||||
variable: "CPPLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "ggerganov/whisper.cpp"
|
||||
|
||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
steps:
|
||||
- name: Dependabot metadata
|
||||
id: metadata
|
||||
uses: dependabot/fetch-metadata@v2.2.0
|
||||
uses: dependabot/fetch-metadata@v2.3.0
|
||||
with:
|
||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
skip-commit-verification: true
|
||||
|
||||
4
.github/workflows/deploy-explorer.yaml
vendored
4
.github/workflows/deploy-explorer.yaml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
run: |
|
||||
CGO_ENABLED=0 make build-api
|
||||
- name: rm
|
||||
uses: appleboy/ssh-action@v1.2.0
|
||||
uses: appleboy/ssh-action@v1.2.2
|
||||
with:
|
||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
rm: true
|
||||
target: ./local-ai
|
||||
- name: restarting
|
||||
uses: appleboy/ssh-action@v1.2.0
|
||||
uses: appleboy/ssh-action@v1.2.2
|
||||
with:
|
||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||
|
||||
9
.github/workflows/generate_grpc_cache.yaml
vendored
9
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -2,9 +2,10 @@ name: 'generate and publish GRPC docker caches'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
schedule:
|
||||
# daily at midnight
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
concurrency:
|
||||
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -16,7 +17,7 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
runs-on: 'arc-runner-set'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
|
||||
5
.github/workflows/image_build.yml
vendored
5
.github/workflows/image_build.yml
vendored
@@ -310,6 +310,11 @@ jobs:
|
||||
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
||||
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
||||
|
||||
- name: Cleanup
|
||||
run: |
|
||||
docker builder prune -f
|
||||
docker system prune --force --volumes --all
|
||||
|
||||
- name: Latest tag
|
||||
# run this on branches, when it is a tag and there is a latest-image defined
|
||||
if: github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag'
|
||||
|
||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
with:
|
||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||
# Check the PR diff using the current branch and the base branch of the PR
|
||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||
id: git-diff-action
|
||||
with:
|
||||
json_diff_file_output: diff.json
|
||||
|
||||
@@ -24,6 +24,7 @@ RUN apt-get update && \
|
||||
ca-certificates \
|
||||
curl libssl-dev \
|
||||
git \
|
||||
git-lfs \
|
||||
unzip upx-ucl && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
|
||||
Copyright (c) 2023-2025 Ettore Di Giacinto (mudler@localai.io)
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
44
Makefile
44
Makefile
@@ -6,9 +6,7 @@ BINARY_NAME=local-ai
|
||||
DETECT_LIBS?=true
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
||||
CPPLLAMA_VERSION?=4663bd353c61c1136cd8a97b9908755e4ab30cec
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -24,7 +22,7 @@ BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
||||
STABLEDIFFUSION_GGML_VERSION?=19d876ee300a055629926ff836489901f734f2b7
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
@@ -151,7 +149,6 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
|
||||
export CXX=$(ROCM_HOME)/llvm/bin/clang++
|
||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export GGML_HIP=1
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
@@ -188,7 +185,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
@@ -222,19 +218,6 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
mkdir -p sources/go-llama.cpp
|
||||
cd sources/go-llama.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(GOLLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(GOLLAMA_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||
|
||||
## bark.cpp
|
||||
sources/bark.cpp:
|
||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
||||
@@ -310,19 +293,17 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
$(GOCMD) mod download
|
||||
@@ -330,7 +311,6 @@ prepare-sources: get-sources replace
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) build
|
||||
@@ -434,7 +414,7 @@ run: prepare ## run local-ai
|
||||
test-models/testmodel.ggml:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
|
||||
wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
|
||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
@@ -449,8 +429,7 @@ test: prepare test-models/testmodel.ggml grpcs
|
||||
export GO_TAGS="tts debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama-gguf
|
||||
$(MAKE) test-tts
|
||||
$(MAKE) test-stablediffusion
|
||||
@@ -479,10 +458,6 @@ teardown-e2e:
|
||||
rm -rf $(TEST_DIR) || true
|
||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||
|
||||
test-llama: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
|
||||
test-llama-gguf: prepare-test
|
||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
||||
@@ -760,13 +735,6 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/llama-ggml
|
||||
endif
|
||||
|
||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
||||
@@ -861,7 +829,7 @@ swagger:
|
||||
|
||||
.PHONY: gen-assets
|
||||
gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
|
||||
@@ -212,7 +212,7 @@ A huge thank you to our generous sponsors who support this project covering CI e
|
||||
|
||||
<p align="center">
|
||||
<a href="https://www.spectrocloud.com/" target="blank">
|
||||
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
|
||||
<img height="200" src="https://github.com/user-attachments/assets/72eab1dd-8b93-4fc0-9ade-84db49f24962">
|
||||
</a>
|
||||
<a href="https://www.premai.io/" target="blank">
|
||||
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
name: text-embedding-ada-002
|
||||
embeddings: true
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
@@ -1,101 +1,57 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
f16: true
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
no_mixed_free_string: true
|
||||
schema_type: llama3.1 # or JSON is supported too (json)
|
||||
response_regex:
|
||||
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
|
||||
mmap: true
|
||||
name: gpt-4
|
||||
parameters:
|
||||
model: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- <|eot_id|>
|
||||
- <|end_of_text|>
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
||||
You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
{{.Input }}
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .Content}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||
{{ if .FunctionCall -}}
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
The Function was executed and the response was:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content -}}
|
||||
{{ else if .FunctionCall -}}
|
||||
{{ range .FunctionCall }}
|
||||
[{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
|
||||
{{ end }}
|
||||
{{ end -}}
|
||||
<|eot_id|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|im_start|>system
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
function: |
|
||||
<|start_header_id|>system<|end_header_id|>
|
||||
You are an expert in composing functions. You are given a question and a set of possible functions.
|
||||
Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
|
||||
If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
|
||||
If you decide to invoke any of the function(s), you MUST put it in the format as follows:
|
||||
[func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
|
||||
You SHOULD NOT include any other text in the response.
|
||||
Here is a list of functions in JSON format that you can invoke.
|
||||
{{toJson .Functions}}
|
||||
<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||||
{{.Input}}
|
||||
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
download_files:
|
||||
- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
||||
sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
|
||||
uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
|
||||
8
aio/cpu/vad.yaml
Normal file
8
aio/cpu/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -1,31 +1,49 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
mmap: true
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
name: gpt-4o
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: bakllava-mmproj.gguf
|
||||
parameters:
|
||||
model: bakllava.gguf
|
||||
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- </s>
|
||||
- <|endoftext|>
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}<|im_end|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
|
||||
download_files:
|
||||
- filename: bakllava.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
||||
- filename: bakllava-mmproj.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4-vision-preview",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||
@@ -129,7 +129,7 @@ detect_gpu
|
||||
detect_gpu_size
|
||||
|
||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||
|
||||
check_vars
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
embeddings: true
|
||||
name: text-embedding-ada-002
|
||||
backend: sentencetransformers
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
@@ -1,101 +1,53 @@
|
||||
name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
context_size: 4096
|
||||
f16: true
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
capture_llm_results:
|
||||
- (?s)<Thought>(.*?)</Thought>
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
properties_order: name,arguments
|
||||
json_regex_match:
|
||||
- (?s)<Output>(.*?)</Output>
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
- key: (?s)<Thought>(.*?)</Thought>
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
mmap: true
|
||||
name: gpt-4
|
||||
parameters:
|
||||
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- </s>
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .Content}}
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
{{ end -}}<|im_end|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
You are an AI assistant that executes function calls, and these are the tools at your disposal:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<|im_start|>assistant
|
||||
|
||||
download_files:
|
||||
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
|
||||
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||
8
aio/gpu-8g/vad.yaml
Normal file
8
aio/gpu-8g/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -1,35 +1,49 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
mmap: true
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
name: gpt-4o
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- </s>
|
||||
- <|endoftext|>
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}<|im_end|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
|
||||
download_files:
|
||||
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4-vision-preview",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||
@@ -1,7 +1,7 @@
|
||||
embeddings: true
|
||||
name: text-embedding-ada-002
|
||||
backend: sentencetransformers
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
@@ -1,103 +1,53 @@
|
||||
name: gpt-4
|
||||
mmap: false
|
||||
context_size: 8192
|
||||
|
||||
f16: false
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
context_size: 4096
|
||||
f16: true
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
capture_llm_results:
|
||||
- (?s)<Thought>(.*?)</Thought>
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
properties_order: name,arguments
|
||||
json_regex_match:
|
||||
- (?s)<Output>(.*?)</Output>
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
- key: (?s)<Thought>(.*?)</Thought>
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
mmap: true
|
||||
name: gpt-4
|
||||
parameters:
|
||||
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- </s>
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
<tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
<tool_response>
|
||||
{{- end }}
|
||||
{{- if .Content}}
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall}}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{- end }}
|
||||
{{- if .FunctionCall }}
|
||||
</tool_call>
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
{{ end -}}<|im_end|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
You are an AI assistant that executes function calls, and these are the tools at your disposal:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
|
||||
download_files:
|
||||
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
|
||||
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
|
||||
8
aio/intel/vad.yaml
Normal file
8
aio/intel/vad.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
backend: silero-vad
|
||||
name: silero-vad
|
||||
parameters:
|
||||
model: silero-vad.onnx
|
||||
download_files:
|
||||
- filename: silero-vad.onnx
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
@@ -1,35 +1,50 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
mmap: false
|
||||
f16: false
|
||||
f16: true
|
||||
mmap: true
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
name: gpt-4o
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: llava-v1.6-7b-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
seed: -1
|
||||
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- </s>
|
||||
- <|endoftext|>
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}<|im_end|>
|
||||
completion: |
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
function: |
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
|
||||
|
||||
download_files:
|
||||
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
|
||||
- filename: llava-v1.6-7b-mmproj-f16.gguf
|
||||
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4-vision-preview",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
|
||||
@@ -163,6 +163,10 @@ message Reply {
|
||||
double timing_token_generation = 5;
|
||||
}
|
||||
|
||||
message GrammarTrigger {
|
||||
string word = 1;
|
||||
}
|
||||
|
||||
message ModelOptions {
|
||||
string Model = 1;
|
||||
int32 ContextSize = 2;
|
||||
@@ -224,6 +228,11 @@ message ModelOptions {
|
||||
int32 MaxModelLen = 54;
|
||||
int32 TensorParallelSize = 55;
|
||||
string LoadFormat = 58;
|
||||
bool DisableLogStatus = 66;
|
||||
string DType = 67;
|
||||
int32 LimitImagePerPrompt = 68;
|
||||
int32 LimitVideoPerPrompt = 69;
|
||||
int32 LimitAudioPerPrompt = 70;
|
||||
|
||||
string MMProj = 41;
|
||||
|
||||
@@ -247,6 +256,8 @@ message ModelOptions {
|
||||
|
||||
string CacheTypeKey = 63;
|
||||
string CacheTypeValue = 64;
|
||||
|
||||
repeated GrammarTrigger GrammarTriggers = 65;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
||||
@@ -467,6 +467,10 @@ struct llama_server_context
|
||||
bool all_slots_are_idle = false;
|
||||
bool add_bos_token = true;
|
||||
bool has_eos_token = true;
|
||||
bool has_gpu = false;
|
||||
|
||||
bool grammar_lazy = false;
|
||||
std::vector<common_grammar_trigger> grammar_triggers;
|
||||
|
||||
int32_t n_ctx; // total context for all clients / slots
|
||||
|
||||
@@ -508,7 +512,10 @@ struct llama_server_context
|
||||
if (!params.mmproj.empty()) {
|
||||
multimodal = true;
|
||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
||||
clp_ctx = clip_init(params.mmproj.c_str(), clip_context_params {
|
||||
/* use_gpu */ has_gpu,
|
||||
/*verbosity=*/ 1,
|
||||
});
|
||||
if(clp_ctx == nullptr) {
|
||||
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||
return false;
|
||||
@@ -706,6 +713,8 @@ struct llama_server_context
|
||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||
slot->sparams.grammar_triggers = grammar_triggers;
|
||||
slot->sparams.grammar_lazy = grammar_lazy;
|
||||
|
||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||
// Might be better to reject the request with a 400 ?
|
||||
@@ -1150,6 +1159,14 @@ struct llama_server_context
|
||||
slot.has_next_token = false;
|
||||
}
|
||||
|
||||
if (slot.n_past >= slot.n_ctx) {
|
||||
slot.truncated = true;
|
||||
slot.stopped_limit = true;
|
||||
slot.has_next_token = false;
|
||||
|
||||
LOG_VERBOSE("stopped due to running out of context capacity", {});
|
||||
}
|
||||
|
||||
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
||||
{
|
||||
slot.stopped_eos = true;
|
||||
@@ -1337,7 +1354,7 @@ struct llama_server_context
|
||||
queue_results.send(res);
|
||||
}
|
||||
|
||||
void send_embedding(llama_client_slot &slot)
|
||||
void send_embedding(llama_client_slot &slot, const llama_batch & batch)
|
||||
{
|
||||
task_result res;
|
||||
res.id = slot.task_id;
|
||||
@@ -1359,10 +1376,38 @@ struct llama_server_context
|
||||
else
|
||||
{
|
||||
const float *data = llama_get_embeddings(ctx);
|
||||
std::vector<float> embedding(data, data + n_embd);
|
||||
std::vector<float> embd_res(n_embd, 0.0f);
|
||||
std::vector<std::vector<float>> embedding;
|
||||
for (int i = 0; i < batch.n_tokens; ++i) {
|
||||
if (!batch.logits[i] || batch.seq_id[i][0] != slot.id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
|
||||
if (embd == NULL) {
|
||||
embd = llama_get_embeddings_ith(ctx, i);
|
||||
}
|
||||
|
||||
if (embd == NULL) {
|
||||
LOG("failed to get embeddings");
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// normalize only when there is pooling
|
||||
// TODO: configurable
|
||||
if (llama_pooling_type(ctx) != LLAMA_POOLING_TYPE_NONE) {
|
||||
common_embd_normalize(embd, embd_res.data(), n_embd, 2);
|
||||
embedding.push_back(embd_res);
|
||||
} else {
|
||||
embedding.push_back({ embd, embd + n_embd });
|
||||
}
|
||||
}
|
||||
|
||||
// OAI compat
|
||||
res.result_json = json
|
||||
{
|
||||
{"embedding", embedding },
|
||||
{"embedding", embedding[0] },
|
||||
};
|
||||
}
|
||||
queue_results.send(res);
|
||||
@@ -1622,17 +1667,17 @@ struct llama_server_context
|
||||
{
|
||||
if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
|
||||
{
|
||||
// this check is redundant (for good)
|
||||
// we should never get here, because generation should already stopped in process_token()
|
||||
|
||||
// START LOCALAI changes
|
||||
// Temporary disable context-shifting as it can lead to infinite loops (issue: https://github.com/ggerganov/llama.cpp/issues/3969)
|
||||
// See: https://github.com/mudler/LocalAI/issues/1333
|
||||
// Context is exhausted, release the slot
|
||||
slot.release();
|
||||
send_final_response(slot);
|
||||
slot.cache_tokens.clear();
|
||||
slot.n_past = 0;
|
||||
slot.truncated = false;
|
||||
slot.has_next_token = true;
|
||||
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||
slot.has_next_token = false;
|
||||
LOG_ERROR("context is exhausted, release the slot", {});
|
||||
|
||||
continue;
|
||||
// END LOCALAI changes
|
||||
@@ -1983,7 +2028,7 @@ struct llama_server_context
|
||||
// prompt evaluated for embedding
|
||||
if (slot.embedding)
|
||||
{
|
||||
send_embedding(slot);
|
||||
send_embedding(slot, batch_view);
|
||||
slot.release();
|
||||
slot.i_batch = -1;
|
||||
continue;
|
||||
@@ -2077,7 +2122,11 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
|
||||
}
|
||||
|
||||
std::function<void(int)> shutdown_handler;
|
||||
inline void signal_handler(int signal) { shutdown_handler(signal); }
|
||||
|
||||
inline void signal_handler(int signal) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////
|
||||
////////////////////////////////
|
||||
@@ -2273,7 +2322,7 @@ static std::string get_all_kv_cache_types() {
|
||||
}
|
||||
|
||||
static void params_parse(const backend::ModelOptions* request,
|
||||
common_params & params) {
|
||||
common_params & params, llama_server_context &llama) {
|
||||
|
||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||
|
||||
@@ -2311,6 +2360,20 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
add_rpc_devices(std::string(llama_grpc_servers));
|
||||
}
|
||||
|
||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
||||
for (int i = 0; i < request->options_size(); i++) {
|
||||
std::string opt = request->options(i);
|
||||
char *optname = strtok(&opt[0], ":");
|
||||
char *optval = strtok(NULL, ":");
|
||||
if (optval == NULL) {
|
||||
optval = "true";
|
||||
}
|
||||
|
||||
if (!strcmp(optname, "gpu")) {
|
||||
llama.has_gpu = true;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
@@ -2374,6 +2437,21 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
if ( request->ropefreqscale() != 0.0f ) {
|
||||
params.rope_freq_scale = request->ropefreqscale();
|
||||
}
|
||||
|
||||
if (request->grammartriggers_size() > 0) {
|
||||
LOG_INFO("configuring grammar triggers", {});
|
||||
llama.grammar_lazy = true;
|
||||
for (int i = 0; i < request->grammartriggers_size(); i++) {
|
||||
common_grammar_trigger trigger;
|
||||
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_WORD;
|
||||
trigger.value = request->grammartriggers(i).word();
|
||||
// trigger.at_start = request->grammartriggers(i).at_start();
|
||||
llama.grammar_triggers.push_back(trigger);
|
||||
LOG_INFO("grammar trigger", {
|
||||
{ "word", trigger.value },
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2389,7 +2467,7 @@ public:
|
||||
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
||||
// Implement LoadModel RPC
|
||||
common_params params;
|
||||
params_parse(request, params);
|
||||
params_parse(request, params, llama);
|
||||
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
@@ -2522,6 +2600,18 @@ public:
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
|
||||
json data = parse_options(false, request, llama);
|
||||
|
||||
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
|
||||
|
||||
for (int i=0 ; i< tokens.size(); i++){
|
||||
response->add_tokens(tokens[i]);
|
||||
}
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||
llama_client_slot* active_slot = llama.get_active_slot();
|
||||
|
||||
@@ -2563,6 +2653,20 @@ void RunServer(const std::string& server_address) {
|
||||
int main(int argc, char** argv) {
|
||||
std::string server_address("localhost:50051");
|
||||
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
struct sigaction sigint_action;
|
||||
sigint_action.sa_handler = signal_handler;
|
||||
sigemptyset (&sigint_action.sa_mask);
|
||||
sigint_action.sa_flags = 0;
|
||||
sigaction(SIGINT, &sigint_action, NULL);
|
||||
sigaction(SIGTERM, &sigint_action, NULL);
|
||||
#elif defined (_WIN32)
|
||||
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
|
||||
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
|
||||
};
|
||||
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
#endif
|
||||
|
||||
// Define long and short options
|
||||
struct option long_options[] = {
|
||||
{"addr", required_argument, nullptr, 'a'},
|
||||
|
||||
@@ -35,6 +35,8 @@ const char* sample_method_str[] = {
|
||||
"ipndm",
|
||||
"ipndm_v",
|
||||
"lcm",
|
||||
"ddim_trailing",
|
||||
"tcd",
|
||||
};
|
||||
|
||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
||||
@@ -173,6 +175,7 @@ int gen_image(char *text, char *negativeText, int width, int height, int steps,
|
||||
-1, //clip_skip
|
||||
cfg_scale, // sfg_scale
|
||||
3.5f,
|
||||
0, // eta
|
||||
width,
|
||||
height,
|
||||
sample_method,
|
||||
|
||||
@@ -1,204 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type LLM struct {
|
||||
base.SingleThread
|
||||
|
||||
llama *llama.LLama
|
||||
}
|
||||
|
||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||
ropeFreqBase := float32(10000)
|
||||
ropeFreqScale := float32(1)
|
||||
|
||||
if opts.RopeFreqBase != 0 {
|
||||
ropeFreqBase = opts.RopeFreqBase
|
||||
}
|
||||
if opts.RopeFreqScale != 0 {
|
||||
ropeFreqScale = opts.RopeFreqScale
|
||||
}
|
||||
|
||||
llamaOpts := []llama.ModelOption{
|
||||
llama.WithRopeFreqBase(ropeFreqBase),
|
||||
llama.WithRopeFreqScale(ropeFreqScale),
|
||||
}
|
||||
|
||||
if opts.NGQA != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
|
||||
}
|
||||
|
||||
if opts.RMSNormEps != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
|
||||
}
|
||||
|
||||
if opts.ContextSize != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
|
||||
}
|
||||
if opts.F16Memory {
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
if opts.Embeddings {
|
||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||
}
|
||||
if opts.NGPULayers != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
|
||||
}
|
||||
|
||||
llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
|
||||
llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
|
||||
llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
|
||||
if opts.NBatch != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
|
||||
} else {
|
||||
llamaOpts = append(llamaOpts, llama.SetNBatch(512))
|
||||
}
|
||||
|
||||
if opts.NUMA {
|
||||
llamaOpts = append(llamaOpts, llama.EnableNUMA)
|
||||
}
|
||||
|
||||
if opts.LowVRAM {
|
||||
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
|
||||
}
|
||||
|
||||
model, err := llama.New(opts.ModelFile, llamaOpts...)
|
||||
llm.llama = model
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
||||
ropeFreqBase := float32(10000)
|
||||
ropeFreqScale := float32(1)
|
||||
|
||||
if opts.RopeFreqBase != 0 {
|
||||
ropeFreqBase = opts.RopeFreqBase
|
||||
}
|
||||
if opts.RopeFreqScale != 0 {
|
||||
ropeFreqScale = opts.RopeFreqScale
|
||||
}
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(opts.Temperature),
|
||||
llama.SetTopP(opts.TopP),
|
||||
llama.SetTopK(int(opts.TopK)),
|
||||
llama.SetTokens(int(opts.Tokens)),
|
||||
llama.SetThreads(int(opts.Threads)),
|
||||
llama.WithGrammar(opts.Grammar),
|
||||
llama.SetRopeFreqBase(ropeFreqBase),
|
||||
llama.SetRopeFreqScale(ropeFreqScale),
|
||||
llama.SetNegativePromptScale(opts.NegativePromptScale),
|
||||
llama.SetNegativePrompt(opts.NegativePrompt),
|
||||
}
|
||||
|
||||
if opts.PromptCacheAll {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||
}
|
||||
|
||||
if opts.PromptCacheRO {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
|
||||
}
|
||||
|
||||
// Expected absolute path
|
||||
if opts.PromptCachePath != "" {
|
||||
predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
|
||||
}
|
||||
|
||||
if opts.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
|
||||
}
|
||||
|
||||
if opts.MirostatETA != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
|
||||
}
|
||||
|
||||
if opts.MirostatTAU != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
|
||||
}
|
||||
|
||||
if opts.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
|
||||
|
||||
if opts.PresencePenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
|
||||
}
|
||||
|
||||
if opts.NKeep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
|
||||
}
|
||||
|
||||
if opts.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
|
||||
}
|
||||
|
||||
if opts.F16KV {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if opts.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if opts.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
|
||||
}
|
||||
|
||||
//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
|
||||
predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
|
||||
predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
|
||||
predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
|
||||
predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
|
||||
predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||
}
|
||||
|
||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||
results <- token
|
||||
return true
|
||||
}))
|
||||
|
||||
go func() {
|
||||
_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
|
||||
if err != nil {
|
||||
fmt.Println("err: ", err)
|
||||
}
|
||||
close(results)
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
predictOptions := buildPredictOptions(opts)
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
tokens = append(tokens, int(t))
|
||||
}
|
||||
return llm.llama.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
|
||||
return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,4 +1,4 @@
|
||||
bark==0.1.5
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,4 +1,4 @@
|
||||
transformers
|
||||
transformers==4.48.3
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
coqui-tts
|
||||
@@ -1,6 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
transformers==4.48.3
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,5 +1,5 @@
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
transformers==4.48.3
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,6 +1,6 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
torchaudio==2.4.1+rocm6.0
|
||||
transformers
|
||||
transformers==4.48.3
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -5,6 +5,6 @@ torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
transformers
|
||||
transformers==4.48.3
|
||||
accelerate
|
||||
coqui-tts
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
packaging==24.1
|
||||
@@ -159,6 +159,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
torchType = torch.float16
|
||||
variant = "fp16"
|
||||
|
||||
options = request.Options
|
||||
|
||||
# empty dict
|
||||
self.options = {}
|
||||
|
||||
# The options are a list of strings in this form optname:optvalue
|
||||
# We are storing all the options in a dict so we can use it later when
|
||||
# generating the images
|
||||
for opt in options:
|
||||
key, value = opt.split(":")
|
||||
self.options[key] = value
|
||||
|
||||
local = False
|
||||
modelFile = request.Model
|
||||
|
||||
@@ -441,6 +453,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
||||
|
||||
# populate kwargs from self.options.
|
||||
kwargs.update(self.options)
|
||||
|
||||
# Set seed
|
||||
if request.seed > 0:
|
||||
kwargs["generator"] = torch.Generator(device=self.device).manual_seed(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
setuptools
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
pillow
|
||||
protobuf
|
||||
certifi
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
wheel
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
grpcio-tools
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
phonemizer
|
||||
scipy
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -5,4 +5,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
@@ -6,4 +6,4 @@ accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -5,4 +5,4 @@ numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -7,4 +7,4 @@ numba==0.60.0
|
||||
bitsandbytes
|
||||
outetts
|
||||
bitsandbytes
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -8,4 +8,4 @@ numba==0.60.0
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.4.1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
|
||||
@@ -109,6 +109,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
engine_args.swap_space = request.SwapSpace
|
||||
if request.MaxModelLen != 0:
|
||||
engine_args.max_model_len = request.MaxModelLen
|
||||
if request.DisableLogStatus:
|
||||
engine_args.disable_log_status = request.DisableLogStatus
|
||||
if request.DType != "":
|
||||
engine_args.dtype = request.DType
|
||||
if request.LimitImagePerPrompt != 0 or request.LimitVideoPerPrompt != 0 or request.LimitAudioPerPrompt != 0:
|
||||
# limit-mm-per-prompt defaults to 1 per modality, based on vLLM docs
|
||||
engine_args.limit_mm_per_prompt = {
|
||||
"image": max(request.LimitImagePerPrompt, 1),
|
||||
"video": max(request.LimitVideoPerPrompt, 1),
|
||||
"audio": max(request.LimitAudioPerPrompt, 1)
|
||||
}
|
||||
|
||||
try:
|
||||
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
||||
@@ -269,7 +280,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def load_image(self, image_path: str):
|
||||
"""
|
||||
Load an image from the given file path or base64 encoded data.
|
||||
|
||||
|
||||
Args:
|
||||
image_path (str): The path to the image file or base64 encoded data.
|
||||
|
||||
@@ -288,7 +299,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def load_video(self, video_path: str):
|
||||
"""
|
||||
Load a video from the given file path.
|
||||
|
||||
|
||||
Args:
|
||||
video_path (str): The path to the image file.
|
||||
|
||||
@@ -335,4 +346,4 @@ if __name__ == "__main__":
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
asyncio.run(serve(args.addr))
|
||||
asyncio.run(serve(args.addr))
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.69.0
|
||||
grpcio==1.71.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||
log.Error().Err(err).Msg("error installing models")
|
||||
}
|
||||
|
||||
@@ -145,13 +145,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
|
||||
if options.LoadToMemory != nil {
|
||||
for _, m := range options.LoadToMemory {
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByName(m, options.ModelPath,
|
||||
config.LoadOptionDebug(options.Debug),
|
||||
config.LoadOptionThreads(options.Threads),
|
||||
config.LoadOptionContextSize(options.ContextSize),
|
||||
config.LoadOptionF16(options.F16),
|
||||
config.ModelPath(options.ModelPath),
|
||||
)
|
||||
cfg, err := application.BackendLoader().LoadBackendConfigFileByNameDefaultOptions(m, options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ type TokenUsage struct {
|
||||
TimingTokenGeneration float64
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c *config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
modelFile := c.Model
|
||||
|
||||
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
||||
@@ -48,7 +48,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
}
|
||||
}
|
||||
|
||||
opts := ModelOptions(c, o)
|
||||
opts := ModelOptions(*c, o)
|
||||
inferenceModel, err := loader.Load(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -84,7 +84,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
|
||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||
fn := func() (LLMResponse, error) {
|
||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||
opts := gRPCPredictOpts(*c, loader.ModelPath)
|
||||
opts.Prompt = s
|
||||
opts.Messages = protoMessages
|
||||
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
||||
@@ -116,6 +116,11 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
}
|
||||
|
||||
if tokenCallback != nil {
|
||||
|
||||
if c.TemplateConfig.ReplyPrefix != "" {
|
||||
tokenCallback(c.TemplateConfig.ReplyPrefix, tokenUsage)
|
||||
}
|
||||
|
||||
ss := ""
|
||||
|
||||
var partialRune []byte
|
||||
@@ -165,8 +170,13 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
|
||||
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
|
||||
|
||||
response := string(reply.Message)
|
||||
if c.TemplateConfig.ReplyPrefix != "" {
|
||||
response = c.TemplateConfig.ReplyPrefix + response
|
||||
}
|
||||
|
||||
return LLMResponse{
|
||||
Response: string(reply.Message),
|
||||
Response: response,
|
||||
Usage: tokenUsage,
|
||||
}, err
|
||||
}
|
||||
|
||||
@@ -118,9 +118,18 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
nGPULayers = *c.NGPULayers
|
||||
}
|
||||
|
||||
triggers := make([]*pb.GrammarTrigger, 0)
|
||||
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
||||
triggers = append(triggers, &pb.GrammarTrigger{
|
||||
Word: t.Word,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return &pb.ModelOptions{
|
||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||
SchedulerType: c.Diffusers.SchedulerType,
|
||||
GrammarTriggers: triggers,
|
||||
PipelineType: c.Diffusers.PipelineType,
|
||||
CFGScale: c.CFGScale,
|
||||
LoraAdapter: c.LoraAdapter,
|
||||
@@ -149,6 +158,12 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
SwapSpace: int32(c.SwapSpace),
|
||||
MaxModelLen: int32(c.MaxModelLen),
|
||||
TensorParallelSize: int32(c.TensorParallelSize),
|
||||
DisableLogStatus: c.DisableLogStatus,
|
||||
DType: c.DType,
|
||||
// LimitMMPerPrompt vLLM
|
||||
LimitImagePerPrompt: int32(c.LimitMMPerPrompt.LimitImagePerPrompt),
|
||||
LimitVideoPerPrompt: int32(c.LimitMMPerPrompt.LimitVideoPerPrompt),
|
||||
LimitAudioPerPrompt: int32(c.LimitMMPerPrompt.LimitAudioPerPrompt),
|
||||
MMProj: c.MMProj,
|
||||
FlashAttention: c.FlashAttention,
|
||||
CacheTypeKey: c.CacheTypeK,
|
||||
|
||||
@@ -9,10 +9,10 @@ import (
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
func Rerank(request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
rerankModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
)
|
||||
|
||||
func SoundGeneration(
|
||||
modelFile string,
|
||||
text string,
|
||||
duration *float32,
|
||||
temperature *float32,
|
||||
@@ -25,8 +24,9 @@ func SoundGeneration(
|
||||
backendConfig config.BackendConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
soundGenModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
@@ -44,7 +44,7 @@ func SoundGeneration(
|
||||
|
||||
res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
|
||||
Text: text,
|
||||
Model: modelFile,
|
||||
Model: backendConfig.Model,
|
||||
Dst: filePath,
|
||||
Sample: doSample,
|
||||
Duration: duration,
|
||||
|
||||
@@ -4,24 +4,17 @@ import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc"
|
||||
model "github.com/mudler/LocalAI/pkg/model"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
||||
|
||||
modelFile := backendConfig.Model
|
||||
|
||||
var inferenceModel grpc.Backend
|
||||
var err error
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
|
||||
if backendConfig.Backend == "" {
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
} else {
|
||||
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||
inferenceModel, err = loader.Load(opts...)
|
||||
}
|
||||
if err != nil {
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
@@ -35,6 +28,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
||||
return schema.TokenizeResponse{}, err
|
||||
}
|
||||
|
||||
if resp.Tokens == nil {
|
||||
resp.Tokens = make([]int32, 0)
|
||||
}
|
||||
|
||||
return schema.TokenizeResponse{
|
||||
Tokens: resp.Tokens,
|
||||
}, nil
|
||||
|
||||
@@ -47,7 +47,7 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
||||
tks = append(tks, int(t))
|
||||
}
|
||||
tr.Segments = append(tr.Segments,
|
||||
schema.Segment{
|
||||
schema.TranscriptionSegment{
|
||||
Text: s.Text,
|
||||
Id: int(s.Id),
|
||||
Start: time.Duration(s.Start),
|
||||
|
||||
@@ -14,28 +14,22 @@ import (
|
||||
)
|
||||
|
||||
func ModelTTS(
|
||||
backend,
|
||||
text,
|
||||
modelFile,
|
||||
voice,
|
||||
language string,
|
||||
loader *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
backendConfig config.BackendConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
bb := backend
|
||||
if bb == "" {
|
||||
bb = model.PiperBackend
|
||||
}
|
||||
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
|
||||
opts := ModelOptions(backendConfig, appConfig, model.WithDefaultBackendString(model.PiperBackend))
|
||||
ttsModel, err := loader.Load(opts...)
|
||||
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
if ttsModel == nil {
|
||||
return "", nil, fmt.Errorf("could not load piper model")
|
||||
return "", nil, fmt.Errorf("could not load tts model %q", backendConfig.Model)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
|
||||
@@ -45,22 +39,21 @@ func ModelTTS(
|
||||
fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
|
||||
filePath := filepath.Join(appConfig.AudioDir, fileName)
|
||||
|
||||
// If the model file is not empty, we pass it joined with the model path
|
||||
// We join the model name to the model path here. This seems to only be done for TTS and is HIGHLY suspect.
|
||||
// This should be addressed in a follow up PR soon.
|
||||
// Copying it over nearly verbatim, as TTS backends are not functional without this.
|
||||
modelPath := ""
|
||||
if modelFile != "" {
|
||||
// If the model file is not empty, we pass it joined with the model path
|
||||
// Checking first that it exists and is not outside ModelPath
|
||||
// TODO: we should actually first check if the modelFile is looking like
|
||||
// a FS path
|
||||
mp := filepath.Join(loader.ModelPath, modelFile)
|
||||
if _, err := os.Stat(mp); err == nil {
|
||||
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
modelPath = mp
|
||||
} else {
|
||||
modelPath = modelFile
|
||||
// Checking first that it exists and is not outside ModelPath
|
||||
// TODO: we should actually first check if the modelFile is looking like
|
||||
// a FS path
|
||||
mp := filepath.Join(loader.ModelPath, backendConfig.Model)
|
||||
if _, err := os.Stat(mp); err == nil {
|
||||
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
modelPath = mp
|
||||
} else {
|
||||
modelPath = backendConfig.Model // skip this step if it fails?????
|
||||
}
|
||||
|
||||
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
|
||||
|
||||
38
core/backend/vad.go
Normal file
38
core/backend/vad.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/schema"
|
||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
|
||||
func VAD(request *schema.VADRequest,
|
||||
ctx context.Context,
|
||||
ml *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
backendConfig config.BackendConfig) (*schema.VADResponse, error) {
|
||||
opts := ModelOptions(backendConfig, appConfig)
|
||||
vadModel, err := ml.Load(opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req := proto.VADRequest{
|
||||
Audio: request.Audio,
|
||||
}
|
||||
resp, err := vadModel.VAD(ctx, &req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
segments := []schema.VADSegment{}
|
||||
for _, s := range resp.Segments {
|
||||
segments = append(segments, schema.VADSegment{Start: s.Start, End: s.End})
|
||||
}
|
||||
|
||||
return &schema.VADResponse{
|
||||
Segments: segments,
|
||||
}, nil
|
||||
}
|
||||
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
}
|
||||
|
||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -32,14 +32,13 @@ type RunCMD struct {
|
||||
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||
|
||||
F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"`
|
||||
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
|
||||
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
|
||||
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" help:"Default context size for models" group:"performance"`
|
||||
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
|
||||
@@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||
config.WithF16(r.F16),
|
||||
config.WithStringGalleries(r.Galleries),
|
||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||
config.WithCors(r.CORS),
|
||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||
config.WithCsrf(r.CSRF),
|
||||
|
||||
@@ -86,13 +86,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
options.Backend = t.Backend
|
||||
options.Model = t.Model
|
||||
|
||||
var inputFile *string
|
||||
if t.InputFile != "" {
|
||||
inputFile = &t.InputFile
|
||||
}
|
||||
|
||||
filePath, _, err := backend.SoundGeneration(t.Model, text,
|
||||
filePath, _, err := backend.SoundGeneration(text,
|
||||
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
||||
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
||||
|
||||
|
||||
@@ -52,8 +52,10 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
options.Backend = t.Backend
|
||||
options.Model = t.Model
|
||||
|
||||
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
|
||||
filePath, _, err := backend.ModelTTS(text, t.Voice, t.Language, ml, opts, options)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -44,8 +44,6 @@ type ApplicationConfig struct {
|
||||
DisableGalleryEndpoint bool
|
||||
LoadToMemory []string
|
||||
|
||||
ModelLibraryURL string
|
||||
|
||||
Galleries []Gallery
|
||||
|
||||
BackendAssets embed.FS
|
||||
@@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelLibraryURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ModelLibraryURL = url
|
||||
}
|
||||
}
|
||||
|
||||
func WithLibPath(path string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.LibPath = path
|
||||
|
||||
@@ -130,25 +130,28 @@ type LLMConfig struct {
|
||||
TrimSpace []string `yaml:"trimspace"`
|
||||
TrimSuffix []string `yaml:"trimsuffix"`
|
||||
|
||||
ContextSize *int `yaml:"context_size"`
|
||||
NUMA bool `yaml:"numa"`
|
||||
LoraAdapter string `yaml:"lora_adapter"`
|
||||
LoraBase string `yaml:"lora_base"`
|
||||
LoraAdapters []string `yaml:"lora_adapters"`
|
||||
LoraScales []float32 `yaml:"lora_scales"`
|
||||
LoraScale float32 `yaml:"lora_scale"`
|
||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||
DraftModel string `yaml:"draft_model"`
|
||||
NDraft int32 `yaml:"n_draft"`
|
||||
Quantization string `yaml:"quantization"`
|
||||
LoadFormat string `yaml:"load_format"`
|
||||
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
||||
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
ContextSize *int `yaml:"context_size"`
|
||||
NUMA bool `yaml:"numa"`
|
||||
LoraAdapter string `yaml:"lora_adapter"`
|
||||
LoraBase string `yaml:"lora_base"`
|
||||
LoraAdapters []string `yaml:"lora_adapters"`
|
||||
LoraScales []float32 `yaml:"lora_scales"`
|
||||
LoraScale float32 `yaml:"lora_scale"`
|
||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||
DraftModel string `yaml:"draft_model"`
|
||||
NDraft int32 `yaml:"n_draft"`
|
||||
Quantization string `yaml:"quantization"`
|
||||
LoadFormat string `yaml:"load_format"`
|
||||
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
||||
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
DisableLogStatus bool `yaml:"disable_log_stats"` // vLLM
|
||||
DType string `yaml:"dtype"` // vLLM
|
||||
LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
FlashAttention bool `yaml:"flash_attention"`
|
||||
NoKVOffloading bool `yaml:"no_kv_offloading"`
|
||||
@@ -166,6 +169,13 @@ type LLMConfig struct {
|
||||
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
||||
}
|
||||
|
||||
// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
|
||||
type LimitMMPerPrompt struct {
|
||||
LimitImagePerPrompt int `yaml:"image"`
|
||||
LimitVideoPerPrompt int `yaml:"video"`
|
||||
LimitAudioPerPrompt int `yaml:"audio"`
|
||||
}
|
||||
|
||||
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
||||
type AutoGPTQ struct {
|
||||
ModelBaseName string `yaml:"model_base_name"`
|
||||
@@ -203,6 +213,8 @@ type TemplateConfig struct {
|
||||
Multimodal string `yaml:"multimodal"`
|
||||
|
||||
JinjaTemplate bool `yaml:"jinja_template"`
|
||||
|
||||
ReplyPrefix string `yaml:"reply_prefix"`
|
||||
}
|
||||
|
||||
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
|
||||
@@ -212,7 +224,15 @@ func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
|
||||
return err
|
||||
}
|
||||
*c = BackendConfig(aux)
|
||||
|
||||
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
|
||||
// Make sure the usecases are valid, we rewrite with what we identified
|
||||
c.KnownUsecaseStrings = []string{}
|
||||
for k, usecase := range GetAllBackendConfigUsecases() {
|
||||
if c.HasUsecases(usecase) {
|
||||
c.KnownUsecaseStrings = append(c.KnownUsecaseStrings, k)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -287,7 +307,8 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
defaultTopP := 0.95
|
||||
defaultTopK := 40
|
||||
defaultTemp := 0.9
|
||||
defaultMirostat := 2
|
||||
// https://github.com/mudler/LocalAI/issues/2780
|
||||
defaultMirostat := 0
|
||||
defaultMirostatTAU := 5.0
|
||||
defaultMirostatETA := 0.1
|
||||
defaultTypicalP := 1.0
|
||||
@@ -368,16 +389,6 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Embeddings = &falseV
|
||||
}
|
||||
|
||||
// Value passed by the top level are treated as default (no implicit defaults)
|
||||
// defaults are set by the user
|
||||
if ctx == 0 {
|
||||
ctx = 1024
|
||||
}
|
||||
|
||||
if cfg.ContextSize == nil {
|
||||
cfg.ContextSize = &ctx
|
||||
}
|
||||
|
||||
if threads == 0 {
|
||||
// Threads can't be 0
|
||||
threads = 4
|
||||
@@ -399,7 +410,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
cfg.Debug = &trueV
|
||||
}
|
||||
|
||||
guessDefaultsFromFile(cfg, lo.modelPath)
|
||||
guessDefaultsFromFile(cfg, lo.modelPath, ctx)
|
||||
}
|
||||
|
||||
func (c *BackendConfig) Validate() bool {
|
||||
@@ -436,19 +447,21 @@ func (c *BackendConfig) HasTemplate() bool {
|
||||
type BackendConfigUsecases int
|
||||
|
||||
const (
|
||||
FLAG_ANY BackendConfigUsecases = 0b000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
|
||||
FLAG_ANY BackendConfigUsecases = 0b00000000000
|
||||
FLAG_CHAT BackendConfigUsecases = 0b00000000001
|
||||
FLAG_COMPLETION BackendConfigUsecases = 0b00000000010
|
||||
FLAG_EDIT BackendConfigUsecases = 0b00000000100
|
||||
FLAG_EMBEDDINGS BackendConfigUsecases = 0b00000001000
|
||||
FLAG_RERANK BackendConfigUsecases = 0b00000010000
|
||||
FLAG_IMAGE BackendConfigUsecases = 0b00000100000
|
||||
FLAG_TRANSCRIPT BackendConfigUsecases = 0b00001000000
|
||||
FLAG_TTS BackendConfigUsecases = 0b00010000000
|
||||
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b00100000000
|
||||
FLAG_TOKENIZE BackendConfigUsecases = 0b01000000000
|
||||
FLAG_VAD BackendConfigUsecases = 0b10000000000
|
||||
|
||||
// Common Subsets
|
||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
|
||||
FLAG_LLM BackendConfigUsecases = FLAG_CHAT | FLAG_COMPLETION | FLAG_EDIT
|
||||
)
|
||||
|
||||
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||
@@ -463,10 +476,16 @@ func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
|
||||
"FLAG_TTS": FLAG_TTS,
|
||||
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
|
||||
"FLAG_TOKENIZE": FLAG_TOKENIZE,
|
||||
"FLAG_VAD": FLAG_VAD,
|
||||
"FLAG_LLM": FLAG_LLM,
|
||||
}
|
||||
}
|
||||
|
||||
func stringToFlag(s string) string {
|
||||
return "FLAG_" + strings.ToUpper(s)
|
||||
}
|
||||
|
||||
func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
|
||||
if len(input) == 0 {
|
||||
return nil
|
||||
@@ -474,7 +493,7 @@ func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
|
||||
result := FLAG_ANY
|
||||
flags := GetAllBackendConfigUsecases()
|
||||
for _, str := range input {
|
||||
flag, exists := flags["FLAG_"+strings.ToUpper(str)]
|
||||
flag, exists := flags[stringToFlag(str)]
|
||||
if exists {
|
||||
result |= flag
|
||||
}
|
||||
@@ -548,5 +567,18 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_TOKENIZE) == FLAG_TOKENIZE {
|
||||
tokenizeCapableBackends := []string{"llama.cpp", "rwkv"}
|
||||
if !slices.Contains(tokenizeCapableBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if (u & FLAG_VAD) == FLAG_VAD {
|
||||
if c.Backend != "silero-vad" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -81,10 +81,10 @@ func readMultipleBackendConfigsFromFile(file string, opts ...ConfigLoaderOption)
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot read config file %q: %w", file, err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
return nil, fmt.Errorf("readMultipleBackendConfigsFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
for _, cc := range *c {
|
||||
@@ -101,10 +101,10 @@ func readBackendConfigFromFile(file string, opts ...ConfigLoaderOption) (*Backen
|
||||
c := &BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot read config file %q: %w", file, err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
return nil, fmt.Errorf("readBackendConfigFromFile cannot unmarshal config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
c.SetDefaults(opts...)
|
||||
@@ -117,7 +117,9 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
BasicModelRequest: schema.BasicModelRequest{
|
||||
Model: modelName,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -145,6 +147,15 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigFileByNameDefaultOptions(modelName string, appConfig *ApplicationConfig) (*BackendConfig, error) {
|
||||
return bcl.LoadBackendConfigFileByName(modelName, appConfig.ModelPath,
|
||||
LoadOptionDebug(appConfig.Debug),
|
||||
LoadOptionThreads(appConfig.Threads),
|
||||
LoadOptionContextSize(appConfig.ContextSize),
|
||||
LoadOptionF16(appConfig.F16),
|
||||
ModelPath(appConfig.ModelPath))
|
||||
}
|
||||
|
||||
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
|
||||
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
@@ -167,7 +178,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoa
|
||||
defer bcl.Unlock()
|
||||
c, err := readBackendConfigFromFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
return fmt.Errorf("LoadBackendConfig cannot read config file %q: %w", file, err)
|
||||
}
|
||||
|
||||
if c.Validate() {
|
||||
@@ -324,9 +335,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read directory '%s': %w", path, err)
|
||||
return fmt.Errorf("LoadBackendConfigsFromPath cannot read directory '%s': %w", path, err)
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
@@ -344,13 +356,13 @@ func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...
|
||||
}
|
||||
c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("cannot read config file: %s", file.Name())
|
||||
log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadBackendConfigsFromPath cannot read config file")
|
||||
continue
|
||||
}
|
||||
if c.Validate() {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
log.Error().Err(err).Msgf("config is not valid")
|
||||
log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,9 +48,9 @@ parameters:
|
||||
Expect(config.Name).To(Equal("bar-baz"))
|
||||
Expect(config.Validate()).To(BeTrue())
|
||||
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
|
||||
httpClient := http.Client{}
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer resp.Body.Close()
|
||||
tmp, err = os.CreateTemp("", "config.yaml")
|
||||
|
||||
253
core/config/gguf.go
Normal file
253
core/config/gguf.go
Normal file
@@ -0,0 +1,253 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
)
|
||||
|
||||
type familyType uint8
|
||||
|
||||
const (
|
||||
Unknown familyType = iota
|
||||
LLaMa3
|
||||
CommandR
|
||||
Phi3
|
||||
ChatML
|
||||
Mistral03
|
||||
Gemma
|
||||
DeepSeek2
|
||||
)
|
||||
|
||||
const (
|
||||
defaultContextSize = 1024
|
||||
)
|
||||
|
||||
type settingsConfig struct {
|
||||
StopWords []string
|
||||
TemplateConfig TemplateConfig
|
||||
RepeatPenalty float64
|
||||
}
|
||||
|
||||
// default settings to adopt with a given model family
|
||||
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
|
||||
Gemma: {
|
||||
RepeatPenalty: 1.0,
|
||||
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input }}\n<start_of_turn>model\n",
|
||||
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
|
||||
Completion: "{{.Input}}",
|
||||
},
|
||||
},
|
||||
DeepSeek2: {
|
||||
StopWords: []string{"<|end▁of▁sentence|>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
|
||||
{{ end -}}
|
||||
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<|end▁of▁sentence|>{{end}}
|
||||
{{if eq .RoleName "system" -}}{{.Content}}
|
||||
{{end -}}`,
|
||||
Chat: "{{.Input -}}\nAssistant: ",
|
||||
},
|
||||
},
|
||||
LLaMa3: {
|
||||
StopWords: []string{"<|eot_id|>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "<|begin_of_text|>{{.Input }}\n<|start_header_id|>assistant<|end_header_id|>",
|
||||
ChatMessage: "<|start_header_id|>{{ .RoleName }}<|end_header_id|>\n\n{{.Content }}<|eot_id|>",
|
||||
},
|
||||
},
|
||||
CommandR: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||
Functions: `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
|
||||
You are a function calling AI model, you can call the following functions:
|
||||
## Available Tools
|
||||
{{range .Functions}}
|
||||
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
||||
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}`,
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "system" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "assistant" -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if .FunctionCall -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
|
||||
{{- end -}}`,
|
||||
},
|
||||
StopWords: []string{"<|END_OF_TURN_TOKEN|>"},
|
||||
},
|
||||
Phi3: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input}}\n<|assistant|>",
|
||||
ChatMessage: "<|{{ .RoleName }}|>\n{{.Content}}<|end|>",
|
||||
Completion: "{{.Input}}",
|
||||
},
|
||||
StopWords: []string{"<|end|>", "<|endoftext|>"},
|
||||
},
|
||||
ChatML: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}\n<|im_start|>assistant",
|
||||
Functions: `<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant`,
|
||||
ChatMessage: `<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}<|im_end|>`,
|
||||
},
|
||||
StopWords: []string{"<|im_end|>", "<dummy32000>", "</s>"},
|
||||
},
|
||||
Mistral03: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}",
|
||||
Functions: `[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}`,
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}
|
||||
[INST] {{.Content }} [/INST]
|
||||
{{- else if .FunctionCall -}}
|
||||
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
|
||||
{{- else -}}
|
||||
{{ .Content -}}
|
||||
{{ end -}}`,
|
||||
},
|
||||
StopWords: []string{"<|im_end|>", "<dummy32000>", "</tool_call>", "<|eot_id|>", "<|end_of_text|>", "</s>", "[/TOOL_CALLS]", "[/ACTIONS]"},
|
||||
},
|
||||
}
|
||||
|
||||
// this maps well known template used in HF to model families defined above
|
||||
var knownTemplates = map[string]familyType{
|
||||
`{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}`: ChatML,
|
||||
`{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`: Mistral03,
|
||||
}
|
||||
|
||||
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
|
||||
|
||||
if defaultCtx == 0 && cfg.ContextSize == nil {
|
||||
ctxSize := f.EstimateLLaMACppUsage().ContextSize
|
||||
if ctxSize > 0 {
|
||||
cSize := int(ctxSize)
|
||||
cfg.ContextSize = &cSize
|
||||
} else {
|
||||
defaultCtx = defaultContextSize
|
||||
cfg.ContextSize = &defaultCtx
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.HasTemplate() {
|
||||
// nothing to guess here
|
||||
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||
Any("modelName", f.Model().Name).
|
||||
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
||||
|
||||
// guess the name
|
||||
if cfg.Name == "" {
|
||||
cfg.Name = f.Model().Name
|
||||
}
|
||||
|
||||
family := identifyFamily(f)
|
||||
|
||||
if family == Unknown {
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "family not identified")
|
||||
return
|
||||
}
|
||||
|
||||
// identify template
|
||||
settings, ok := defaultsSettings[family]
|
||||
if ok {
|
||||
cfg.TemplateConfig = settings.TemplateConfig
|
||||
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: guessed template %+v", cfg.TemplateConfig)
|
||||
if len(cfg.StopWords) == 0 {
|
||||
cfg.StopWords = settings.StopWords
|
||||
}
|
||||
if cfg.RepeatPenalty == 0.0 {
|
||||
cfg.RepeatPenalty = settings.RepeatPenalty
|
||||
}
|
||||
} else {
|
||||
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
|
||||
}
|
||||
|
||||
if cfg.HasTemplate() {
|
||||
return
|
||||
}
|
||||
|
||||
// identify from well known templates first, otherwise use the raw jinja template
|
||||
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if found {
|
||||
// try to use the jinja template
|
||||
cfg.TemplateConfig.JinjaTemplate = true
|
||||
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
||||
}
|
||||
}
|
||||
|
||||
func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||
|
||||
// identify from well known templates first
|
||||
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if found && chatTemplate.ValueString() != "" {
|
||||
if family, ok := knownTemplates[chatTemplate.ValueString()]; ok {
|
||||
return family
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise try to identify from the model properties
|
||||
arch := f.Architecture().Architecture
|
||||
eosTokenID := f.Tokenizer().EOSTokenID
|
||||
bosTokenID := f.Tokenizer().BOSTokenID
|
||||
|
||||
isYI := arch == "llama" && bosTokenID == 1 && eosTokenID == 2
|
||||
// WTF! Mistral0.3 and isYi have same bosTokenID and eosTokenID
|
||||
|
||||
llama3 := arch == "llama" && eosTokenID == 128009
|
||||
commandR := arch == "command-r" && eosTokenID == 255001
|
||||
qwen2 := arch == "qwen2"
|
||||
phi3 := arch == "phi-3"
|
||||
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
|
||||
deepseek2 := arch == "deepseek2"
|
||||
|
||||
switch {
|
||||
case deepseek2:
|
||||
return DeepSeek2
|
||||
case gemma:
|
||||
return Gemma
|
||||
case llama3:
|
||||
return LLaMa3
|
||||
case commandR:
|
||||
return CommandR
|
||||
case phi3:
|
||||
return Phi3
|
||||
case qwen2, isYI:
|
||||
return ChatML
|
||||
default:
|
||||
return Unknown
|
||||
}
|
||||
}
|
||||
@@ -3,147 +3,12 @@ package config
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
)
|
||||
|
||||
type familyType uint8
|
||||
|
||||
const (
|
||||
Unknown familyType = iota
|
||||
LLaMa3
|
||||
CommandR
|
||||
Phi3
|
||||
ChatML
|
||||
Mistral03
|
||||
Gemma
|
||||
DeepSeek2
|
||||
)
|
||||
|
||||
type settingsConfig struct {
|
||||
StopWords []string
|
||||
TemplateConfig TemplateConfig
|
||||
RepeatPenalty float64
|
||||
}
|
||||
|
||||
// default settings to adopt with a given model family
|
||||
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
|
||||
Gemma: {
|
||||
RepeatPenalty: 1.0,
|
||||
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input }}\n<start_of_turn>model\n",
|
||||
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
|
||||
Completion: "{{.Input}}",
|
||||
},
|
||||
},
|
||||
DeepSeek2: {
|
||||
StopWords: []string{"<|end▁of▁sentence|>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
|
||||
{{ end -}}
|
||||
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<|end▁of▁sentence|>{{end}}
|
||||
{{if eq .RoleName "system" -}}{{.Content}}
|
||||
{{end -}}`,
|
||||
Chat: "{{.Input -}}\nAssistant: ",
|
||||
},
|
||||
},
|
||||
LLaMa3: {
|
||||
StopWords: []string{"<|eot_id|>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "<|begin_of_text|>{{.Input }}\n<|start_header_id|>assistant<|end_header_id|>",
|
||||
ChatMessage: "<|start_header_id|>{{ .RoleName }}<|end_header_id|>\n\n{{.Content }}<|eot_id|>",
|
||||
},
|
||||
},
|
||||
CommandR: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||
Functions: `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
|
||||
You are a function calling AI model, you can call the following functions:
|
||||
## Available Tools
|
||||
{{range .Functions}}
|
||||
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
||||
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}`,
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "system" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "assistant" -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if .FunctionCall -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
|
||||
{{- end -}}`,
|
||||
},
|
||||
StopWords: []string{"<|END_OF_TURN_TOKEN|>"},
|
||||
},
|
||||
Phi3: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input}}\n<|assistant|>",
|
||||
ChatMessage: "<|{{ .RoleName }}|>\n{{.Content}}<|end|>",
|
||||
Completion: "{{.Input}}",
|
||||
},
|
||||
StopWords: []string{"<|end|>", "<|endoftext|>"},
|
||||
},
|
||||
ChatML: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}\n<|im_start|>assistant",
|
||||
Functions: `<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant`,
|
||||
ChatMessage: `<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}<|im_end|>`,
|
||||
},
|
||||
StopWords: []string{"<|im_end|>", "<dummy32000>", "</s>"},
|
||||
},
|
||||
Mistral03: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}",
|
||||
Functions: `[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}`,
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}
|
||||
[INST] {{.Content }} [/INST]
|
||||
{{- else if .FunctionCall -}}
|
||||
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
|
||||
{{- else -}}
|
||||
{{ .Content -}}
|
||||
{{ end -}}`,
|
||||
},
|
||||
StopWords: []string{"<|im_end|>", "<dummy32000>", "</tool_call>", "<|eot_id|>", "<|end_of_text|>", "</s>", "[/TOOL_CALLS]", "[/ACTIONS]"},
|
||||
},
|
||||
}
|
||||
|
||||
// this maps well known template used in HF to model families defined above
|
||||
var knownTemplates = map[string]familyType{
|
||||
`{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}`: ChatML,
|
||||
`{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`: Mistral03,
|
||||
}
|
||||
|
||||
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
|
||||
|
||||
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
|
||||
if os.Getenv("LOCALAI_DISABLE_GUESSING") == "true" {
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "guessing disabled with LOCALAI_DISABLE_GUESSING")
|
||||
return
|
||||
@@ -154,105 +19,20 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
|
||||
return
|
||||
}
|
||||
|
||||
if cfg.HasTemplate() {
|
||||
// nothing to guess here
|
||||
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
||||
return
|
||||
}
|
||||
|
||||
// We try to guess only if we don't have a template defined already
|
||||
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
|
||||
if err != nil {
|
||||
// Only valid for gguf files
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
|
||||
guessPath := filepath.Join(modelPath, cfg.ModelFileName())
|
||||
|
||||
// try to parse the gguf file
|
||||
f, err := gguf.ParseGGUFFile(guessPath)
|
||||
if err == nil {
|
||||
guessGGUFFromFile(cfg, f, defaultCtx)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||
Any("modelName", f.Model().Name).
|
||||
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
||||
|
||||
// guess the name
|
||||
if cfg.Name == "" {
|
||||
cfg.Name = f.Model().Name
|
||||
}
|
||||
|
||||
family := identifyFamily(f)
|
||||
|
||||
if family == Unknown {
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "family not identified")
|
||||
return
|
||||
}
|
||||
|
||||
// identify template
|
||||
settings, ok := defaultsSettings[family]
|
||||
if ok {
|
||||
cfg.TemplateConfig = settings.TemplateConfig
|
||||
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: guessed template %+v", cfg.TemplateConfig)
|
||||
if len(cfg.StopWords) == 0 {
|
||||
cfg.StopWords = settings.StopWords
|
||||
if cfg.ContextSize == nil {
|
||||
if defaultCtx == 0 {
|
||||
defaultCtx = defaultContextSize
|
||||
}
|
||||
if cfg.RepeatPenalty == 0.0 {
|
||||
cfg.RepeatPenalty = settings.RepeatPenalty
|
||||
}
|
||||
} else {
|
||||
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
|
||||
}
|
||||
|
||||
if cfg.HasTemplate() {
|
||||
return
|
||||
}
|
||||
|
||||
// identify from well known templates first, otherwise use the raw jinja template
|
||||
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if found {
|
||||
// try to use the jinja template
|
||||
cfg.TemplateConfig.JinjaTemplate = true
|
||||
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
|
||||
}
|
||||
}
|
||||
|
||||
func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||
|
||||
// identify from well known templates first
|
||||
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if found && chatTemplate.ValueString() != "" {
|
||||
if family, ok := knownTemplates[chatTemplate.ValueString()]; ok {
|
||||
return family
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise try to identify from the model properties
|
||||
arch := f.Architecture().Architecture
|
||||
eosTokenID := f.Tokenizer().EOSTokenID
|
||||
bosTokenID := f.Tokenizer().BOSTokenID
|
||||
|
||||
isYI := arch == "llama" && bosTokenID == 1 && eosTokenID == 2
|
||||
// WTF! Mistral0.3 and isYi have same bosTokenID and eosTokenID
|
||||
|
||||
llama3 := arch == "llama" && eosTokenID == 128009
|
||||
commandR := arch == "command-r" && eosTokenID == 255001
|
||||
qwen2 := arch == "qwen2"
|
||||
phi3 := arch == "phi-3"
|
||||
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
|
||||
deepseek2 := arch == "deepseek2"
|
||||
|
||||
switch {
|
||||
case deepseek2:
|
||||
return DeepSeek2
|
||||
case gemma:
|
||||
return Gemma
|
||||
case llama3:
|
||||
return LLaMa3
|
||||
case commandR:
|
||||
return CommandR
|
||||
case phi3:
|
||||
return Phi3
|
||||
case qwen2, isYI:
|
||||
return ChatML
|
||||
default:
|
||||
return Unknown
|
||||
cfg.ContextSize = &defaultCtx
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,8 @@ func InstallModelFromGallery(galleries []config.Gallery, name string, basePath s
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
config.Description = model.Description
|
||||
config.License = model.License
|
||||
} else if len(model.ConfigFile) > 0 {
|
||||
// TODO: is this worse than using the override method with a blank cfg yaml?
|
||||
reYamlConfig, err := yaml.Marshal(model.ConfigFile)
|
||||
@@ -114,7 +116,7 @@ func FindModel(models []*GalleryModel, name string, basePath string) *GalleryMod
|
||||
// List available models
|
||||
// Models galleries are a list of yaml files that are hosted on a remote server (for example github).
|
||||
// Each yaml file contains a list of models that can be downloaded and optionally overrides to define a new model setting.
|
||||
func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*GalleryModel, error) {
|
||||
func AvailableGalleryModels(galleries []config.Gallery, basePath string) (GalleryModels, error) {
|
||||
var models []*GalleryModel
|
||||
|
||||
// Get models from galleries
|
||||
|
||||
@@ -48,8 +48,10 @@ var _ = Describe("Model test", func() {
|
||||
defer os.RemoveAll(tempdir)
|
||||
|
||||
gallery := []GalleryModel{{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
Metadata: Metadata{
|
||||
Name: "bert",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
}}
|
||||
out, err := yaml.Marshal(gallery)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
@@ -11,6 +11,14 @@ import (
|
||||
// It is used to install the model by resolving the URL and downloading the files.
|
||||
// The other fields are used to override the configuration of the model.
|
||||
type GalleryModel struct {
|
||||
Metadata `json:",inline" yaml:",inline"`
|
||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||
// Overrides are used to override the configuration of the model located at URL
|
||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||
}
|
||||
|
||||
type Metadata struct {
|
||||
URL string `json:"url,omitempty" yaml:"url,omitempty"`
|
||||
Name string `json:"name,omitempty" yaml:"name,omitempty"`
|
||||
Description string `json:"description,omitempty" yaml:"description,omitempty"`
|
||||
@@ -18,10 +26,6 @@ type GalleryModel struct {
|
||||
URLs []string `json:"urls,omitempty" yaml:"urls,omitempty"`
|
||||
Icon string `json:"icon,omitempty" yaml:"icon,omitempty"`
|
||||
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
|
||||
// config_file is read in the situation where URL is blank - and therefore this is a base config.
|
||||
ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
|
||||
// Overrides are used to override the configuration of the model located at URL
|
||||
Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
|
||||
// AdditionalFiles are used to add additional files to the model
|
||||
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
|
||||
// Gallery is a reference to the gallery which contains the model
|
||||
@@ -58,3 +62,15 @@ func (gm GalleryModels) FindByName(name string) *GalleryModel {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (gm GalleryModels) Paginate(pageNum int, itemsNum int) GalleryModels {
|
||||
start := (pageNum - 1) * itemsNum
|
||||
end := start + itemsNum
|
||||
if start > len(gm) {
|
||||
start = len(gm)
|
||||
}
|
||||
if end > len(gm) {
|
||||
end = len(gm)
|
||||
}
|
||||
return gm[start:end]
|
||||
}
|
||||
|
||||
@@ -9,7 +9,11 @@ import (
|
||||
var _ = Describe("Gallery API tests", func() {
|
||||
Context("requests", func() {
|
||||
It("parses github with a branch", func() {
|
||||
req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
|
||||
req := GalleryModel{
|
||||
Metadata: Metadata{
|
||||
URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
|
||||
},
|
||||
}
|
||||
e, err := GetGalleryConfigFromURL(req.URL, "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(e.Name).To(Equal("gpt4all-j"))
|
||||
|
||||
@@ -130,7 +130,6 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
return metricsService.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
// Health Checks should always be exempt from auth, so register these first
|
||||
routes.HealthRoutes(router)
|
||||
@@ -140,6 +139,28 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
return nil, fmt.Errorf("failed to create key auth config: %w", err)
|
||||
}
|
||||
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
router.Use(favicon.New(favicon.Config{
|
||||
URL: "/favicon.ico",
|
||||
FileSystem: httpFS,
|
||||
File: "static/favicon.ico",
|
||||
}))
|
||||
|
||||
router.Use("/static", filesystem.New(filesystem.Config{
|
||||
Root: httpFS,
|
||||
PathPrefix: "static",
|
||||
Browse: true,
|
||||
}))
|
||||
|
||||
if application.ApplicationConfig().ImageDir != "" {
|
||||
router.Static("/generated-images", application.ApplicationConfig().ImageDir)
|
||||
}
|
||||
|
||||
if application.ApplicationConfig().AudioDir != "" {
|
||||
router.Static("/generated-audio", application.ApplicationConfig().AudioDir)
|
||||
}
|
||||
|
||||
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
|
||||
router.Use(v2keyauth.New(*kaConfig))
|
||||
|
||||
@@ -167,27 +188,15 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
galleryService := services.NewGalleryService(application.ApplicationConfig())
|
||||
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
|
||||
|
||||
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
routes.RegisterOpenAIRoutes(router, application)
|
||||
requestExtractor := middleware.NewRequestExtractor(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
routes.RegisterLocalAIRoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
|
||||
if !application.ApplicationConfig().DisableWebUI {
|
||||
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
|
||||
}
|
||||
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
httpFS := http.FS(embedDirStatic)
|
||||
|
||||
router.Use(favicon.New(favicon.Config{
|
||||
URL: "/favicon.ico",
|
||||
FileSystem: httpFS,
|
||||
File: "static/favicon.ico",
|
||||
}))
|
||||
|
||||
router.Use("/static", filesystem.New(filesystem.Config{
|
||||
Root: httpFS,
|
||||
PathPrefix: "static",
|
||||
Browse: true,
|
||||
}))
|
||||
routes.RegisterJINARoutes(router, requestExtractor, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
|
||||
|
||||
// Define a custom 404 handler
|
||||
// Note: keep this at the bottom!
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user