mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 16:51:44 -04:00
Compare commits
394 Commits
ci/better_
...
fix/closed
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83110891fd | ||
|
|
2553de0187 | ||
|
|
408dfe62ee | ||
|
|
648ffdf449 | ||
|
|
04c0841ca9 | ||
|
|
43144c4743 | ||
|
|
a778668bcd | ||
|
|
4b131a7090 | ||
|
|
d06a052d54 | ||
|
|
b5115903bf | ||
|
|
afaff175d0 | ||
|
|
4686877c6d | ||
|
|
e5586e8781 | ||
|
|
3acd767ac4 | ||
|
|
5488fc3bc1 | ||
|
|
0965c6cd68 | ||
|
|
db704199dc | ||
|
|
2cc3b7128e | ||
|
|
88b99d30bb | ||
|
|
307a835199 | ||
|
|
f84b55d1ef | ||
|
|
139209353f | ||
|
|
a30058b80f | ||
|
|
53f406dc35 | ||
|
|
2649407f44 | ||
|
|
0a8f627cce | ||
|
|
76d4e88e0c | ||
|
|
d4d2a76f8f | ||
|
|
7d306c6431 | ||
|
|
44bdacac61 | ||
|
|
6bd6e2bdeb | ||
|
|
2908ff3f6b | ||
|
|
f19277b8e2 | ||
|
|
32de75c683 | ||
|
|
164a9e972f | ||
|
|
d747f2c89b | ||
|
|
58662db48e | ||
|
|
078942fc9f | ||
|
|
6dfee99575 | ||
|
|
ad62156d54 | ||
|
|
1689740269 | ||
|
|
50a3b54e34 | ||
|
|
e94a50e9db | ||
|
|
4e0f3cc980 | ||
|
|
2a8cbad122 | ||
|
|
453c45d022 | ||
|
|
4550abbfce | ||
|
|
f2ba1cfb01 | ||
|
|
8c4196faf3 | ||
|
|
b0f4556c0f | ||
|
|
fa5c98549a | ||
|
|
3d12d2037c | ||
|
|
d6522e69ca | ||
|
|
ef1507d000 | ||
|
|
a3d69872e3 | ||
|
|
33b2d38dd0 | ||
|
|
74408bdc77 | ||
|
|
8c4f720fb5 | ||
|
|
8002ad27cb | ||
|
|
1b8a77433a | ||
|
|
a370a11115 | ||
|
|
aa87eff283 | ||
|
|
0d784f46e5 | ||
|
|
c54cfd3609 | ||
|
|
6555994060 | ||
|
|
0893d3cbbe | ||
|
|
90cacb9692 | ||
|
|
69d2902b0a | ||
|
|
c1752cbb83 | ||
|
|
b8e129f2a6 | ||
|
|
cc6fac1688 | ||
|
|
043cb94436 | ||
|
|
bbdf78615e | ||
|
|
e332ff8066 | ||
|
|
26d99ed1c7 | ||
|
|
1da8d8b9db | ||
|
|
bf8f8671d1 | ||
|
|
51cba89682 | ||
|
|
3e8e71f8b6 | ||
|
|
4edd8c80b4 | ||
|
|
fd70a22196 | ||
|
|
56f4deb938 | ||
|
|
9bd7f3f995 | ||
|
|
ee21b00a8d | ||
|
|
1f43678d53 | ||
|
|
20c0e128c0 | ||
|
|
5c3d1d81e6 | ||
|
|
c22b3187a7 | ||
|
|
54f2657870 | ||
|
|
cef7f8a014 | ||
|
|
bf8e50a11d | ||
|
|
6c6cd8bbe0 | ||
|
|
00d6c2a966 | ||
|
|
415cf31aa3 | ||
|
|
f55053bfba | ||
|
|
e24654ada0 | ||
|
|
c4cecba07f | ||
|
|
38cad0b8dc | ||
|
|
052af98dcd | ||
|
|
56d8f5163c | ||
|
|
b6af4f4467 | ||
|
|
a5b08f43ff | ||
|
|
c15f506fd5 | ||
|
|
a2a63460e9 | ||
|
|
2fcea486eb | ||
|
|
5c9d26e39b | ||
|
|
191bc2e50a | ||
|
|
fbb9facda4 | ||
|
|
c6a819e92f | ||
|
|
a50cde69a2 | ||
|
|
e5bd74878e | ||
|
|
dc98b2ea44 | ||
|
|
acf119828f | ||
|
|
a53392f919 | ||
|
|
eee1fb2c75 | ||
|
|
8826ca93b3 | ||
|
|
5049629381 | ||
|
|
92136a5d34 | ||
|
|
075e5015c0 | ||
|
|
46fd4ff6db | ||
|
|
4a4e44bf55 | ||
|
|
22247ad92c | ||
|
|
d0f2bf3181 | ||
|
|
0e4e101101 | ||
|
|
f4b1bd8f6d | ||
|
|
e95cb8eaac | ||
|
|
db1159b651 | ||
|
|
a9a3a07c3b | ||
|
|
06c8339862 | ||
|
|
2394f7833f | ||
|
|
36e19928eb | ||
|
|
abc27e0dc4 | ||
|
|
42d6b9e0cc | ||
|
|
c866b77586 | ||
|
|
5356b81b7f | ||
|
|
30fe163100 | ||
|
|
afb5bbc1b8 | ||
|
|
12a8d0e46f | ||
|
|
09c7d8d458 | ||
|
|
149cc1eb13 | ||
|
|
a5ce987bdb | ||
|
|
2edc732c33 | ||
|
|
fec01d9e69 | ||
|
|
9ca5ef339a | ||
|
|
a8003f2b7c | ||
|
|
25deb4ba95 | ||
|
|
3d3db1d74f | ||
|
|
cabb1602e8 | ||
|
|
25e7661de2 | ||
|
|
cbfab81c35 | ||
|
|
925315ab5c | ||
|
|
5213e79f5c | ||
|
|
7fe6d0ad2b | ||
|
|
cf747bcdec | ||
|
|
d51444d606 | ||
|
|
e35d8169b1 | ||
|
|
a7ac2f7bb0 | ||
|
|
b7496dea9b | ||
|
|
8f45852273 | ||
|
|
48a1a7da23 | ||
|
|
535f771761 | ||
|
|
eda5c1422b | ||
|
|
300f2779e4 | ||
|
|
3be71811ca | ||
|
|
4cfa040f17 | ||
|
|
c5d5092347 | ||
|
|
c3e374f30a | ||
|
|
424b2e0064 | ||
|
|
486b491c4c | ||
|
|
7781dfe49e | ||
|
|
5139dadceb | ||
|
|
96ea240b39 | ||
|
|
8b8522046d | ||
|
|
36d980e520 | ||
|
|
11c16f529e | ||
|
|
58c4a6d9d9 | ||
|
|
9a159fbfad | ||
|
|
791c3ace72 | ||
|
|
ac5d655598 | ||
|
|
a9f438e1e6 | ||
|
|
6b72bdcb0a | ||
|
|
f336c1a7b8 | ||
|
|
47bc72343c | ||
|
|
bf87943da7 | ||
|
|
dbe1e652bc | ||
|
|
8ac79cfc33 | ||
|
|
0180bcf22a | ||
|
|
a7998e0263 | ||
|
|
923e4cce85 | ||
|
|
e2d40d0fcc | ||
|
|
70f6d80677 | ||
|
|
22e30fccbc | ||
|
|
5da07b0a84 | ||
|
|
0ff1b7f8f7 | ||
|
|
71be066937 | ||
|
|
b95c523385 | ||
|
|
589a2ac869 | ||
|
|
68fc014c6d | ||
|
|
56db715a91 | ||
|
|
c2804c42fe | ||
|
|
1655411ccd | ||
|
|
3daba4731c | ||
|
|
45ce1803f8 | ||
|
|
164dee65c3 | ||
|
|
2259512345 | ||
|
|
b8e7a76524 | ||
|
|
72f97e62bb | ||
|
|
607fd066f0 | ||
|
|
69a3b22fa1 | ||
|
|
11d960b2a6 | ||
|
|
ae6d327698 | ||
|
|
bb9a5aea9e | ||
|
|
49739e85a0 | ||
|
|
12950cac21 | ||
|
|
d2da2f1672 | ||
|
|
e1d0d94073 | ||
|
|
b5b01ea635 | ||
|
|
bc684c259c | ||
|
|
da3bc8077d | ||
|
|
6a6094a58d | ||
|
|
8369614b6e | ||
|
|
cac472d4a1 | ||
|
|
6d0ede813f | ||
|
|
a0252127a2 | ||
|
|
00ad01fd79 | ||
|
|
d5c0ad8a1b | ||
|
|
11ed1cebb3 | ||
|
|
fc640be591 | ||
|
|
311954f41b | ||
|
|
bbfa5075f6 | ||
|
|
47fe31aa53 | ||
|
|
18dddc1ae0 | ||
|
|
b38fd8780b | ||
|
|
11eaf9c0a7 | ||
|
|
5d892f86ea | ||
|
|
7f06954425 | ||
|
|
771a052480 | ||
|
|
99b57b321b | ||
|
|
75ef6ccf1e | ||
|
|
de1fbdca71 | ||
|
|
ce827139bb | ||
|
|
0762aa5327 | ||
|
|
81ae92f017 | ||
|
|
84d6e5a987 | ||
|
|
ac5f6f210b | ||
|
|
61fe2404a0 | ||
|
|
db2d8f4d04 | ||
|
|
a9c521eb41 | ||
|
|
a913fd310d | ||
|
|
fbaae8528d | ||
|
|
7d030b56b2 | ||
|
|
0add16049e | ||
|
|
2bb48b0816 | ||
|
|
023ce59d44 | ||
|
|
7822d944b5 | ||
|
|
b510352393 | ||
|
|
d3a217c254 | ||
|
|
2a3427e533 | ||
|
|
7ec02babd5 | ||
|
|
5a4c4f4ab2 | ||
|
|
af095204fa | ||
|
|
70e53bc191 | ||
|
|
7cf59d9f98 | ||
|
|
7147f1990f | ||
|
|
16f7140461 | ||
|
|
6f1b4f29a8 | ||
|
|
93658fc5fd | ||
|
|
736df11454 | ||
|
|
2669f4738a | ||
|
|
aca2c4196a | ||
|
|
9cfd89087b | ||
|
|
6aba6223c7 | ||
|
|
a28b3771a7 | ||
|
|
d02a0f6f01 | ||
|
|
c12d121783 | ||
|
|
b06046fe4c | ||
|
|
6d350ccce0 | ||
|
|
bcd3c1deb2 | ||
|
|
5afea9babf | ||
|
|
a495515e10 | ||
|
|
9a8a249932 | ||
|
|
dfa183551e | ||
|
|
d903925fe7 | ||
|
|
0ccf35ba45 | ||
|
|
a199d98fb7 | ||
|
|
9475a6fa05 | ||
|
|
1d651bbfad | ||
|
|
7fade2ffbd | ||
|
|
f0702e5ff8 | ||
|
|
a7a27a5082 | ||
|
|
10024905a0 | ||
|
|
0c31d1a4c8 | ||
|
|
9ba108bd5b | ||
|
|
721340ec9a | ||
|
|
b9da06dafe | ||
|
|
20f9f267e8 | ||
|
|
a85c4f96e0 | ||
|
|
9337a01e9d | ||
|
|
8758aa4ecf | ||
|
|
42fba91521 | ||
|
|
0c84c7b1cc | ||
|
|
73c9b3598d | ||
|
|
bb6d06f0d1 | ||
|
|
13cb7960bd | ||
|
|
e4c696d966 | ||
|
|
d58f9c333b | ||
|
|
dd270d58bd | ||
|
|
1465e3dfd1 | ||
|
|
1651f25d03 | ||
|
|
e67c9ae3bf | ||
|
|
f2f372b7f5 | ||
|
|
857443e2b5 | ||
|
|
ad449a237e | ||
|
|
3f74b34f06 | ||
|
|
1dbb3b8abc | ||
|
|
5d416006ae | ||
|
|
27b03a52f3 | ||
|
|
1ed5af1da8 | ||
|
|
7278bf3de8 | ||
|
|
d6b3fbb4ad | ||
|
|
3457acc48b | ||
|
|
f18862fb44 | ||
|
|
be55fce9be | ||
|
|
409e2d348e | ||
|
|
8bbf09370c | ||
|
|
714e80abce | ||
|
|
121f143fc0 | ||
|
|
c50e0edcb8 | ||
|
|
d6c4e751f2 | ||
|
|
faadabea14 | ||
|
|
57f7900210 | ||
|
|
5bb2321fe0 | ||
|
|
10324d9ad2 | ||
|
|
02de274e00 | ||
|
|
7d92936e1a | ||
|
|
447d9f844b | ||
|
|
89979da33f | ||
|
|
71f3fa653a | ||
|
|
cd385c2720 | ||
|
|
83ffd626dc | ||
|
|
121ffe61c5 | ||
|
|
710f566553 | ||
|
|
bd57ebf042 | ||
|
|
ae4b67fb56 | ||
|
|
9729d2ae37 | ||
|
|
4dfa085339 | ||
|
|
7137c32f8f | ||
|
|
e30114a4a4 | ||
|
|
a92b3b13e9 | ||
|
|
c4534cd908 | ||
|
|
9f61ac8acc | ||
|
|
74eaf02484 | ||
|
|
7ba4a78fcc | ||
|
|
f3357a17b8 | ||
|
|
8627bc2dd4 | ||
|
|
0c0bc18c94 | ||
|
|
63ee689f21 | ||
|
|
a0e0804f25 | ||
|
|
71b8232076 | ||
|
|
2e2a0dffbc | ||
|
|
6d20f38510 | ||
|
|
9e3e892ac7 | ||
|
|
5fcafc3d1e | ||
|
|
74f8785047 | ||
|
|
b1773e33d5 | ||
|
|
a507c13f8e | ||
|
|
8317839ca5 | ||
|
|
4a1a3a56ba | ||
|
|
f7ffa9cd58 | ||
|
|
60117ec057 | ||
|
|
1c708d21de | ||
|
|
1d94aaa10f | ||
|
|
8814b31805 | ||
|
|
36e185ba63 | ||
|
|
2c8623dbb4 | ||
|
|
e198347886 | ||
|
|
66cf38b0b3 | ||
|
|
11b2adae0c | ||
|
|
61b5602111 | ||
|
|
abcf0ff000 | ||
|
|
9cfc9ac66f | ||
|
|
c3306fe825 | ||
|
|
ad5978b3ca | ||
|
|
4e11ca55fd | ||
|
|
52ba230d31 | ||
|
|
307ad7592b | ||
|
|
06aa068ac7 | ||
|
|
ecc6345436 | ||
|
|
c8fc92d6d5 | ||
|
|
b3f362f229 | ||
|
|
e03363df3d | ||
|
|
d1a222ea87 | ||
|
|
69a2cf06c8 | ||
|
|
c53196e197 |
17
.devcontainer-scripts/postcreate.sh
Normal file
17
.devcontainer-scripts/postcreate.sh
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
cd /workspace
|
||||||
|
|
||||||
|
# Get the files into the volume without a bind mount
|
||||||
|
if [ ! -d ".git" ]; then
|
||||||
|
git clone https://github.com/mudler/LocalAI.git .
|
||||||
|
else
|
||||||
|
git fetch
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Standard Post-Create script completed."
|
||||||
|
|
||||||
|
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
|
||||||
|
echo "Launching customization postcreate.sh"
|
||||||
|
bash "/devcontainer-customization/postcreate.sh"
|
||||||
|
fi
|
||||||
16
.devcontainer-scripts/poststart.sh
Normal file
16
.devcontainer-scripts/poststart.sh
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
cd /workspace
|
||||||
|
|
||||||
|
# Grab the pre-stashed backend assets to avoid build issues
|
||||||
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
|
# Ensures generated source files are present upon load
|
||||||
|
make prepare
|
||||||
|
|
||||||
|
echo "Standard Post-Start script completed."
|
||||||
|
|
||||||
|
if [ -f "/devcontainer-customization/poststart.sh" ]; then
|
||||||
|
echo "Launching customization poststart.sh"
|
||||||
|
bash "/devcontainer-customization/poststart.sh"
|
||||||
|
fi
|
||||||
55
.devcontainer-scripts/utils.sh
Normal file
55
.devcontainer-scripts/utils.sh
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# This file contains some really simple functions that are useful when building up customization scripts.
|
||||||
|
|
||||||
|
|
||||||
|
# Checks if the git config has a user registered - and sets it up if not.
|
||||||
|
#
|
||||||
|
# Param 1: name
|
||||||
|
# Param 2: email
|
||||||
|
#
|
||||||
|
config_user() {
|
||||||
|
echo "Configuring git for $1 <$2>"
|
||||||
|
local gcn=$(git config --global user.name)
|
||||||
|
if [ -z "${gcn}" ]; then
|
||||||
|
echo "Setting up git user / remote"
|
||||||
|
git config --global user.name "$1"
|
||||||
|
git config --global user.email "$2"
|
||||||
|
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
|
||||||
|
#
|
||||||
|
# Param 1: remote name
|
||||||
|
# Param 2: remote url
|
||||||
|
#
|
||||||
|
config_remote() {
|
||||||
|
echo "Adding git remote and fetching $2 as $1"
|
||||||
|
local gr=$(git remote -v | grep $1)
|
||||||
|
if [ -z "${gr}" ]; then
|
||||||
|
git remote add $1 $2
|
||||||
|
fi
|
||||||
|
git fetch $1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Setup special .ssh files
|
||||||
|
# Prints out lines of text to make things pretty
|
||||||
|
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
|
||||||
|
setup_ssh() {
|
||||||
|
echo "starting ~/.ssh directory setup..."
|
||||||
|
mkdir -p "${HOME}.ssh"
|
||||||
|
chmod 0700 "${HOME}/.ssh"
|
||||||
|
echo "-----"
|
||||||
|
local files=("$@")
|
||||||
|
for file in "${files[@]}" ; do
|
||||||
|
local cfile="/devcontainer-customization/${file}"
|
||||||
|
local hfile="${HOME}/.ssh/${file}"
|
||||||
|
if [ ! -f "${hfile}" ]; then
|
||||||
|
echo "copying \"${file}\""
|
||||||
|
cp "${cfile}" "${hfile}"
|
||||||
|
chmod 600 "${hfile}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo "~/.ssh directory setup complete!"
|
||||||
|
}
|
||||||
25
.devcontainer/customization/README.md
Normal file
25
.devcontainer/customization/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
Place any additional resources your environment requires in this directory
|
||||||
|
|
||||||
|
Script hooks are currently called for:
|
||||||
|
`postcreate.sh` and `poststart.sh`
|
||||||
|
|
||||||
|
If files with those names exist here, they will be called at the end of the normal script.
|
||||||
|
|
||||||
|
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
|
||||||
|
|
||||||
|
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
|
||||||
|
|
||||||
|
```
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
source "/.devcontainer-scripts/utils.sh"
|
||||||
|
|
||||||
|
sshfiles=("config", "key.pub")
|
||||||
|
|
||||||
|
setup_ssh "${sshfiles[@]}"
|
||||||
|
|
||||||
|
config_user "YOUR NAME" "YOUR EMAIL"
|
||||||
|
|
||||||
|
config_remote "REMOTE NAME" "REMOTE URL"
|
||||||
|
|
||||||
|
```
|
||||||
24
.devcontainer/devcontainer.json
Normal file
24
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
|
||||||
|
"name": "LocalAI",
|
||||||
|
"workspaceFolder": "/workspace",
|
||||||
|
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
|
||||||
|
"service": "api",
|
||||||
|
"shutdownAction": "stopCompose",
|
||||||
|
"customizations": {
|
||||||
|
"vscode": {
|
||||||
|
"extensions": [
|
||||||
|
"golang.go",
|
||||||
|
"ms-vscode.makefile-tools",
|
||||||
|
"ms-azuretools.vscode-docker",
|
||||||
|
"ms-python.python",
|
||||||
|
"ms-python.debugpy",
|
||||||
|
"wayou.vscode-todo-highlight",
|
||||||
|
"waderyan.gitblame"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"forwardPorts": [8080, 3000],
|
||||||
|
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
|
||||||
|
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
|
||||||
|
}
|
||||||
48
.devcontainer/docker-compose-devcontainer.yml
Normal file
48
.devcontainer/docker-compose-devcontainer.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
services:
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
target: devcontainer
|
||||||
|
args:
|
||||||
|
- FFMPEG=true
|
||||||
|
- IMAGE_TYPE=extras
|
||||||
|
- GO_TAGS=stablediffusion p2p tts
|
||||||
|
env_file:
|
||||||
|
- ../.env
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
volumes:
|
||||||
|
- localai_workspace:/workspace
|
||||||
|
- ../models:/host-models
|
||||||
|
- ./customization:/devcontainer-customization
|
||||||
|
command: /bin/sh -c "while sleep 1000; do :; done"
|
||||||
|
cap_add:
|
||||||
|
- SYS_PTRACE
|
||||||
|
security_opt:
|
||||||
|
- seccomp:unconfined
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus
|
||||||
|
container_name: prometheus
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
ports:
|
||||||
|
- 9090:9090
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./prometheus:/etc/prometheus
|
||||||
|
- prom_data:/prometheus
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana
|
||||||
|
container_name: grafana
|
||||||
|
ports:
|
||||||
|
- 3000:3000
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_USER=admin
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=grafana
|
||||||
|
volumes:
|
||||||
|
- ./grafana:/etc/grafana/provisioning/datasources
|
||||||
|
volumes:
|
||||||
|
prom_data:
|
||||||
|
localai_workspace:
|
||||||
10
.devcontainer/grafana/datasource.yml
Normal file
10
.devcontainer/grafana/datasource.yml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
access: proxy
|
||||||
|
editable: true
|
||||||
21
.devcontainer/prometheus/prometheus.yml
Normal file
21
.devcontainer/prometheus/prometheus.yml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
scrape_timeout: 10s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets: []
|
||||||
|
scheme: http
|
||||||
|
timeout: 10s
|
||||||
|
api_version: v1
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
honor_timestamps: true
|
||||||
|
scrape_interval: 15s
|
||||||
|
scrape_timeout: 10s
|
||||||
|
metrics_path: /metrics
|
||||||
|
scheme: http
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- localhost:9090
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
.idea
|
.idea
|
||||||
.github
|
.github
|
||||||
.vscode
|
.vscode
|
||||||
|
.devcontainer
|
||||||
models
|
models
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
|
|||||||
3
.env
3
.env
@@ -79,6 +79,9 @@
|
|||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
# LOCALAI_PARALLEL_REQUESTS=true
|
# LOCALAI_PARALLEL_REQUESTS=true
|
||||||
|
|
||||||
|
# Enable to allow p2p mode
|
||||||
|
# LOCALAI_P2P=true
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
|
|||||||
4
.github/bump_deps.sh
vendored
4
.github/bump_deps.sh
vendored
@@ -18,5 +18,5 @@ if [ -z "$CURRENT_COMMIT" ]; then
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$REPO_message.txt"
|
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
|
||||||
echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$REPO_message.txt"
|
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
|
||||||
11
.github/check_and_update.py
vendored
11
.github/check_and_update.py
vendored
@@ -29,9 +29,14 @@ def calculate_sha256(file_path):
|
|||||||
def manual_safety_check_hf(repo_id):
|
def manual_safety_check_hf(repo_id):
|
||||||
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
||||||
scan = scanResponse.json()
|
scan = scanResponse.json()
|
||||||
if scan['hasUnsafeFile']:
|
# Check if 'hasUnsafeFile' exists in the response
|
||||||
return scan
|
if 'hasUnsafeFile' in scan:
|
||||||
return None
|
if scan['hasUnsafeFile']:
|
||||||
|
return scan
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
download_type, repo_id_or_url = parse_uri(uri)
|
download_type, repo_id_or_url = parse_uri(uri)
|
||||||
|
|
||||||
|
|||||||
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -67,10 +67,6 @@ updates:
|
|||||||
directory: "/backend/python/parler-tts"
|
directory: "/backend/python/parler-tts"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/petals"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/rerankers"
|
directory: "/backend/python/rerankers"
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
15
.github/workflows/bump_deps.yaml
vendored
15
.github/workflows/bump_deps.yaml
vendored
@@ -45,18 +45,25 @@ jobs:
|
|||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
{
|
{
|
||||||
echo 'message<<EOF'
|
echo 'message<<EOF'
|
||||||
cat "${{ matrix.repository }}_message.txt"
|
cat "${{ matrix.variable }}_message.txt"
|
||||||
echo EOF
|
echo EOF
|
||||||
} >> "$GITHUB_OUTPUT"
|
} >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo 'commit<<EOF'
|
||||||
|
cat "${{ matrix.variable }}_commit.txt"
|
||||||
|
echo EOF
|
||||||
|
} >> "$GITHUB_OUTPUT"
|
||||||
|
rm -rfv ${{ matrix.variable }}_message.txt
|
||||||
|
rm -rfv ${{ matrix.variable }}_commit.txt
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v7
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
|
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
|
||||||
branch: "update/${{ matrix.variable }}"
|
branch: "update/${{ matrix.variable }}"
|
||||||
body: ${{ steps.bump.outputs.message }}
|
body: ${{ steps.bump.outputs.message }}
|
||||||
signoff: true
|
signoff: true
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v7
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -36,7 +36,7 @@ jobs:
|
|||||||
sudo chmod 777 /hf_cache
|
sudo chmod 777 /hf_cache
|
||||||
bash .github/checksum_checker.sh gallery/index.yaml
|
bash .github/checksum_checker.sh gallery/index.yaml
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v7
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
64
.github/workflows/deploy-explorer.yaml
vendored
Normal file
64
.github/workflows/deploy-explorer.yaml
vendored
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
name: Explorer deployment
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
tags:
|
||||||
|
- 'v*'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-linux:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
cache: false
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
make protogen-go
|
||||||
|
- name: Build api
|
||||||
|
run: |
|
||||||
|
CGO_ENABLED=0 make build-api
|
||||||
|
- name: rm
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||||
|
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||||
|
script: |
|
||||||
|
sudo rm -rf local-ai/ || true
|
||||||
|
- name: copy file via ssh
|
||||||
|
uses: appleboy/scp-action@v0.1.7
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||||
|
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||||
|
source: "local-ai"
|
||||||
|
overwrite: true
|
||||||
|
rm: true
|
||||||
|
target: ./local-ai
|
||||||
|
- name: restarting
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
key: ${{ secrets.EXPLORER_SSH_KEY }}
|
||||||
|
port: ${{ secrets.EXPLORER_SSH_PORT }}
|
||||||
|
script: |
|
||||||
|
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
|
||||||
|
sudo systemctl restart local-ai
|
||||||
117
.github/workflows/image.yml
vendored
117
.github/workflows/image.yml
vendored
@@ -13,6 +13,78 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
hipblas-jobs:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
|
aio: ${{ matrix.aio }}
|
||||||
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: 2
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
aio: "-aio-gpu-hipblas"
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
latest-image: 'latest-gpu-hipblas'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'extras'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-core'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
self-hosted-jobs:
|
self-hosted-jobs:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
@@ -39,7 +111,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Extra images
|
# Extra images
|
||||||
@@ -122,29 +194,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'extras'
|
|
||||||
aio: "-aio-gpu-hipblas"
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
latest-image: 'latest-gpu-hipblas'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
@@ -212,26 +261,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-core'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
|||||||
6
.github/workflows/release.yaml
vendored
6
.github/workflows/release.yaml
vendored
@@ -294,7 +294,7 @@ jobs:
|
|||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -327,7 +327,7 @@ jobs:
|
|||||||
cache: false
|
cache: false
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
brew install protobuf grpc libomp llvm
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
- name: Build
|
- name: Build
|
||||||
@@ -336,7 +336,7 @@ jobs:
|
|||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@master
|
uses: securego/gosec@v2.21.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
26
.github/workflows/test-extra.yml
vendored
26
.github/workflows/test-extra.yml
vendored
@@ -168,32 +168,6 @@ jobs:
|
|||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-petals:
|
|
||||||
# runs-on: ubuntu-latest
|
|
||||||
# steps:
|
|
||||||
# - name: Clone
|
|
||||||
# uses: actions/checkout@v4
|
|
||||||
# with:
|
|
||||||
# submodules: true
|
|
||||||
# - name: Dependencies
|
|
||||||
# run: |
|
|
||||||
# sudo apt-get update
|
|
||||||
# sudo apt-get install build-essential ffmpeg
|
|
||||||
# # Install UV
|
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
# sudo apt-get install -y libopencv-dev
|
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
# - name: Test petals
|
|
||||||
# run: |
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
|
|||||||
14
.github/workflows/test.yml
vendored
14
.github/workflows/test.yml
vendored
@@ -178,13 +178,22 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
# Install protoc
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
@@ -214,12 +223,13 @@ jobs:
|
|||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CC=/opt/homebrew/opt/llvm/bin/clang
|
||||||
# Used to run the newer GNUMake version from brew that supports --output-sync
|
# Used to run the newer GNUMake version from brew that supports --output-sync
|
||||||
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
|
||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
|
|||||||
2
.github/workflows/update_swagger.yaml
vendored
2
.github/workflows/update_swagger.yaml
vendored
@@ -25,7 +25,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make protogen-go swagger
|
make protogen-go swagger
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v7
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -54,3 +54,6 @@ docs/static/gallery.html
|
|||||||
|
|
||||||
# backend virtual environments
|
# backend virtual environments
|
||||||
**/venv
|
**/venv
|
||||||
|
|
||||||
|
# per-developer customization files for the development container
|
||||||
|
.devcontainer/customization/*
|
||||||
21
.vscode/launch.json
vendored
21
.vscode/launch.json
vendored
@@ -3,12 +3,12 @@
|
|||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
{
|
||||||
"name": "Python: Current File",
|
"name": "Python: Current File",
|
||||||
"type": "python",
|
"type": "debugpy",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "${file}",
|
"program": "${file}",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
"cwd": "${workspaceFolder}/examples/langchain-chroma",
|
"cwd": "${fileDirname}",
|
||||||
"env": {
|
"env": {
|
||||||
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
"OPENAI_API_BASE": "http://localhost:8080/v1",
|
||||||
"OPENAI_API_KEY": "abc"
|
"OPENAI_API_KEY": "abc"
|
||||||
@@ -19,15 +19,16 @@
|
|||||||
"type": "go",
|
"type": "go",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"mode": "debug",
|
"mode": "debug",
|
||||||
"program": "${workspaceFolder}/main.go",
|
"program": "${workspaceRoot}",
|
||||||
"args": [
|
"args": [],
|
||||||
"api"
|
|
||||||
],
|
|
||||||
"env": {
|
"env": {
|
||||||
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
"LOCALAI_LOG_LEVEL": "debug",
|
||||||
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
|
"LOCALAI_P2P": "true",
|
||||||
"DEBUG": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
}
|
},
|
||||||
|
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
||||||
|
"envFile": "${workspaceFolder}/.env",
|
||||||
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -15,8 +15,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
- [Documentation](#documentation)
|
- [Documentation](#documentation)
|
||||||
- [Community and Communication](#community-and-communication)
|
- [Community and Communication](#community-and-communication)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
@@ -54,7 +52,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
|||||||
|
|
||||||
## Coding Guidelines
|
## Coding Guidelines
|
||||||
|
|
||||||
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
|
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
@@ -84,5 +82,3 @@ We are welcome the contribution of the documents, please open new PR or create a
|
|||||||
- You can reach out via the Github issue tracker.
|
- You can reach out via the Github issue tracker.
|
||||||
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||||
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
---
|
|
||||||
|
|||||||
120
Dockerfile
120
Dockerfile
@@ -8,12 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
|
|||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG GO_VERSION=1.22.5
|
ARG GO_VERSION=1.22.6
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
@@ -30,7 +30,7 @@ RUN apt-get update && \
|
|||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||||
|
|
||||||
# Install grpc compilers
|
# Install grpc compilers
|
||||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
||||||
@@ -39,15 +39,18 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
|
|||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
|
|
||||||
|
RUN test -n "$TARGETARCH" \
|
||||||
|
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
||||||
|
|
||||||
# Use the variables in subsequent instructions
|
# Use the variables in subsequent instructions
|
||||||
RUN echo "Target Architecture: $TARGETARCH"
|
RUN echo "Target Architecture: $TARGETARCH"
|
||||||
RUN echo "Target Variant: $TARGETVARIANT"
|
RUN echo "Target Variant: $TARGETVARIANT"
|
||||||
|
|
||||||
# Cuda
|
# Cuda
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH=/usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
# HipBLAS requirements
|
# HipBLAS requirements
|
||||||
ENV PATH /opt/rocm/bin:${PATH}
|
ENV PATH=/opt/rocm/bin:${PATH}
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
@@ -62,9 +65,6 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
|||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
RUN test -n "$TARGETARCH" \
|
|
||||||
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
@@ -81,7 +81,7 @@ RUN apt-get update && \
|
|||||||
espeak \
|
espeak \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
python-is-python3 \
|
python-is-python3 \
|
||||||
python3-dev \
|
python3-dev llvm \
|
||||||
python3-venv && \
|
python3-venv && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
@@ -217,13 +217,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
|
||||||
# Adjustments to the build process should likely be made here.
|
|
||||||
FROM requirements-drivers AS builder
|
FROM requirements-drivers AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts p2p"
|
ARG GO_TAGS="stablediffusion tts p2p"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
|
ARG LD_FLAGS="-s -w"
|
||||||
|
|
||||||
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
|
||||||
ENV GO_TAGS=${GO_TAGS}
|
ENV GO_TAGS=${GO_TAGS}
|
||||||
@@ -231,14 +232,12 @@ ENV MAKEFLAGS=${MAKEFLAGS}
|
|||||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
ENV LD_FLAGS=${LD_FLAGS}
|
||||||
|
|
||||||
|
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
COPY . .
|
|
||||||
COPY .git .
|
|
||||||
RUN echo "GO_TAGS: $GO_TAGS"
|
|
||||||
|
|
||||||
RUN make prepare
|
|
||||||
|
|
||||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||||
@@ -256,8 +255,35 @@ RUN <<EOT bash
|
|||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
||||||
|
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
||||||
|
FROM builder-base AS builder-sd
|
||||||
|
|
||||||
|
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
||||||
|
COPY Makefile .
|
||||||
|
COPY go.mod .
|
||||||
|
COPY go.sum .
|
||||||
|
COPY backend/backend.proto ./backend/backend.proto
|
||||||
|
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
||||||
|
COPY pkg/grpc ./pkg/grpc
|
||||||
|
COPY pkg/stablediffusion ./pkg/stablediffusion
|
||||||
|
RUN git init
|
||||||
|
RUN make sources/go-stable-diffusion
|
||||||
|
RUN touch prepare-sources
|
||||||
|
|
||||||
|
# Actually build the backend
|
||||||
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
|
# Adjustments to the build process should likely be made here.
|
||||||
|
FROM builder-sd AS builder
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
# Install the pre-built GRPC
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
@@ -265,8 +291,20 @@ COPY --from=grpc /opt/grpc /usr/local
|
|||||||
# Rebuild with defaults backends
|
# Rebuild with defaults backends
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
COPY .git .
|
||||||
|
|
||||||
|
RUN make prepare
|
||||||
|
|
||||||
## Build the binary
|
## Build the binary
|
||||||
RUN make build
|
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
||||||
|
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||||
|
## (both will use CUDA or hipblas for the actual computation)
|
||||||
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
|
else \
|
||||||
|
make build; \
|
||||||
|
fi
|
||||||
|
|
||||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||||
@@ -276,6 +314,40 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
|
# The devcontainer target is not used on CI. It is a target for developers to use locally -
|
||||||
|
# rather than copying files it mounts them locally and leaves building to the developer
|
||||||
|
|
||||||
|
FROM builder-base AS devcontainer
|
||||||
|
|
||||||
|
ARG FFMPEG
|
||||||
|
|
||||||
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
|
# Add FFmpeg
|
||||||
|
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ffmpeg && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/* \
|
||||||
|
; fi
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ssh less wget
|
||||||
|
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
||||||
|
|
||||||
|
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
||||||
|
|
||||||
|
RUN go install github.com/mikefarah/yq/v4@latest
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
# This is the final target. The result of this target will be the image uploaded to the registry.
|
# This is the final target. The result of this target will be the image uploaded to the registry.
|
||||||
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
|
||||||
FROM requirements-drivers
|
FROM requirements-drivers
|
||||||
@@ -326,7 +398,7 @@ COPY --from=builder /build/local-ai ./
|
|||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
# Change the shell to bash so we can use [[ tests below
|
||||||
SHELL ["/bin/bash", "-c"]
|
SHELL ["/bin/bash", "-c"]
|
||||||
@@ -345,9 +417,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
|||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/transformers-musicgen \
|
make -C backend/python/transformers-musicgen \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/exllama \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
@@ -356,9 +425,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
|
|||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/openvoice \
|
make -C backend/python/openvoice \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/petals \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/sentencetransformers \
|
make -C backend/python/sentencetransformers \
|
||||||
; fi && \
|
; fi && \
|
||||||
|
|||||||
79
Makefile
79
Makefile
@@ -8,11 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=0d6fb52be0c1b7e77eb855f3adc4952771c8ce4c
|
CPPLLAMA_VERSION?=d5ed2b929d85bbd7dbeecb690880f07d9d7a6077
|
||||||
|
|
||||||
# gpt4all version
|
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
|
||||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
@@ -20,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=fe36c909715e6751277ddb020e7892c7670b61d4
|
WHISPER_CPP_VERSION?=ccc2547210e09e3a1785817383ab770389bb442b
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
@@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
|||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||||
@@ -253,18 +248,6 @@ sources/go-piper:
|
|||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
## GPT4ALL
|
|
||||||
sources/gpt4all:
|
|
||||||
mkdir -p sources/gpt4all
|
|
||||||
cd sources/gpt4all && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(GPT4ALL_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(GPT4ALL_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
|
||||||
|
|
||||||
## RWKV
|
## RWKV
|
||||||
sources/go-rwkv.cpp:
|
sources/go-rwkv.cpp:
|
||||||
@@ -318,7 +301,7 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||||
@@ -328,7 +311,6 @@ replace:
|
|||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
@@ -339,7 +321,6 @@ dropreplace:
|
|||||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
@@ -349,7 +330,6 @@ prepare-sources: get-sources replace
|
|||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
|
||||||
$(MAKE) -C sources/go-rwkv.cpp clean
|
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
@@ -379,6 +359,9 @@ clean-tests:
|
|||||||
rm -rf test-dir
|
rm -rf test-dir
|
||||||
rm -rf core/http/backend-assets
|
rm -rf core/http/backend-assets
|
||||||
|
|
||||||
|
clean-dc: clean
|
||||||
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
build: prepare backend-assets grpcs ## Build the project
|
build: prepare backend-assets grpcs ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
@@ -396,7 +379,7 @@ build-minimal:
|
|||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||||
|
|
||||||
build-api:
|
build-api:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
|
||||||
|
|
||||||
backend-assets/lib:
|
backend-assets/lib:
|
||||||
mkdir -p backend-assets/lib
|
mkdir -p backend-assets/lib
|
||||||
@@ -407,7 +390,7 @@ ifeq ($(DETECT_LIBS),true)
|
|||||||
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
|
||||||
endif
|
endif
|
||||||
ifeq ($(OS),Darwin)
|
ifeq ($(OS),Darwin)
|
||||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
|
||||||
else
|
else
|
||||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||||
@@ -469,8 +452,7 @@ test: prepare test-models/testmodel.ggml grpcs
|
|||||||
export GO_TAGS="tts stablediffusion debug"
|
export GO_TAGS="tts stablediffusion debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
$(MAKE) test-gpt4all
|
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
$(MAKE) test-tts
|
$(MAKE) test-tts
|
||||||
@@ -486,7 +468,7 @@ run-e2e-image:
|
|||||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
run-e2e-aio:
|
run-e2e-aio: protogen-go
|
||||||
@echo 'Running e2e AIO tests'
|
@echo 'Running e2e AIO tests'
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
||||||
|
|
||||||
@@ -500,10 +482,6 @@ teardown-e2e:
|
|||||||
rm -rf $(TEST_DIR) || true
|
rm -rf $(TEST_DIR) || true
|
||||||
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
docker stop $$(docker ps -q --filter ancestor=localai-tests)
|
||||||
|
|
||||||
test-gpt4all: prepare-test
|
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
|
|
||||||
|
|
||||||
test-llama: prepare-test
|
test-llama: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
|
||||||
@@ -559,10 +537,10 @@ protogen-go-clean:
|
|||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
.PHONY: autogptq-protogen
|
||||||
autogptq-protogen:
|
autogptq-protogen:
|
||||||
@@ -596,14 +574,6 @@ diffusers-protogen:
|
|||||||
diffusers-protogen-clean:
|
diffusers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
.PHONY: exllama-protogen
|
|
||||||
exllama-protogen:
|
|
||||||
$(MAKE) -C backend/python/exllama protogen
|
|
||||||
|
|
||||||
.PHONY: exllama-protogen-clean
|
|
||||||
exllama-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/exllama protogen-clean
|
|
||||||
|
|
||||||
.PHONY: exllama2-protogen
|
.PHONY: exllama2-protogen
|
||||||
exllama2-protogen:
|
exllama2-protogen:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen
|
$(MAKE) -C backend/python/exllama2 protogen
|
||||||
@@ -620,14 +590,6 @@ mamba-protogen:
|
|||||||
mamba-protogen-clean:
|
mamba-protogen-clean:
|
||||||
$(MAKE) -C backend/python/mamba protogen-clean
|
$(MAKE) -C backend/python/mamba protogen-clean
|
||||||
|
|
||||||
.PHONY: petals-protogen
|
|
||||||
petals-protogen:
|
|
||||||
$(MAKE) -C backend/python/petals protogen
|
|
||||||
|
|
||||||
.PHONY: petals-protogen-clean
|
|
||||||
petals-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/petals protogen-clean
|
|
||||||
|
|
||||||
.PHONY: rerankers-protogen
|
.PHONY: rerankers-protogen
|
||||||
rerankers-protogen:
|
rerankers-protogen:
|
||||||
$(MAKE) -C backend/python/rerankers protogen
|
$(MAKE) -C backend/python/rerankers protogen
|
||||||
@@ -708,8 +670,6 @@ prepare-extra-conda-environments: protogen-python
|
|||||||
$(MAKE) -C backend/python/parler-tts
|
$(MAKE) -C backend/python/parler-tts
|
||||||
$(MAKE) -C backend/python/vall-e-x
|
$(MAKE) -C backend/python/vall-e-x
|
||||||
$(MAKE) -C backend/python/openvoice
|
$(MAKE) -C backend/python/openvoice
|
||||||
$(MAKE) -C backend/python/exllama
|
|
||||||
$(MAKE) -C backend/python/petals
|
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
@@ -730,12 +690,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
|||||||
mkdir -p backend-assets/espeak-ng-data
|
mkdir -p backend-assets/espeak-ng-data
|
||||||
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
|
||||||
|
|
||||||
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
|
|
||||||
mkdir -p backend-assets/gpt4all
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
|
||||||
|
|
||||||
backend-assets/grpc: protogen-go replace
|
backend-assets/grpc: protogen-go replace
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
@@ -746,13 +700,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/bert-embeddings
|
$(UPX) backend-assets/grpc/bert-embeddings
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/gpt4all
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
@@ -893,7 +840,7 @@ endif
|
|||||||
|
|
||||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
$(UPX) backend-assets/grpc/whisper
|
$(UPX) backend-assets/grpc/whisper
|
||||||
endif
|
endif
|
||||||
|
|||||||
13
README.md
13
README.md
@@ -40,7 +40,7 @@
|
|||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
@@ -68,10 +68,9 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
|
|
||||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
## 🔥🔥 Hot topics / Roadmap
|
## 📰 Latest project news
|
||||||
|
|
||||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
|
||||||
|
|
||||||
|
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
||||||
@@ -82,8 +81,12 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
|
||||||
Hot topics (looking for contributors):
|
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
|
## 🔥🔥 Hot topics (looking for help):
|
||||||
|
|
||||||
|
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||||
|
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
parameters:
|
parameters:
|
||||||
model: runwayml/stable-diffusion-v1-5
|
model: Lykon/dreamshaper-8
|
||||||
backend: diffusers
|
backend: diffusers
|
||||||
step: 25
|
step: 25
|
||||||
f16: true
|
f16: true
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
mmap: false
|
mmap: false
|
||||||
f16: false
|
f16: false
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ service Backend {
|
|||||||
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
||||||
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
||||||
rpc TTS(TTSRequest) returns (Result) {}
|
rpc TTS(TTSRequest) returns (Result) {}
|
||||||
|
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
|
||||||
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
|
||||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||||
|
|
||||||
@@ -25,6 +26,19 @@ service Backend {
|
|||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||||
|
|
||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
|
|
||||||
|
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define the empty request
|
||||||
|
message MetricsRequest {}
|
||||||
|
|
||||||
|
message MetricsResponse {
|
||||||
|
int32 slot_id = 1;
|
||||||
|
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
|
||||||
|
float tokens_per_second = 3;
|
||||||
|
int32 tokens_generated = 4;
|
||||||
|
int32 prompt_tokens_processed = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message RerankRequest {
|
message RerankRequest {
|
||||||
@@ -133,6 +147,9 @@ message PredictOptions {
|
|||||||
repeated string Images = 42;
|
repeated string Images = 42;
|
||||||
bool UseTokenizerTemplate = 43;
|
bool UseTokenizerTemplate = 43;
|
||||||
repeated Message Messages = 44;
|
repeated Message Messages = 44;
|
||||||
|
repeated string Videos = 45;
|
||||||
|
repeated string Audios = 46;
|
||||||
|
string CorrelationId = 47;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
@@ -270,6 +287,17 @@ message TTSRequest {
|
|||||||
optional string language = 5;
|
optional string language = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message SoundGenerationRequest {
|
||||||
|
string text = 1;
|
||||||
|
string model = 2;
|
||||||
|
string dst = 3;
|
||||||
|
optional float duration = 4;
|
||||||
|
optional float temperature = 5;
|
||||||
|
optional bool sample = 6;
|
||||||
|
optional string src = 7;
|
||||||
|
optional int32 src_divisor = 8;
|
||||||
|
}
|
||||||
|
|
||||||
message TokenizationResponse {
|
message TokenizationResponse {
|
||||||
int32 length = 1;
|
int32 length = 1;
|
||||||
repeated int32 tokens = 2;
|
repeated int32 tokens = 2;
|
||||||
|
|||||||
@@ -13,15 +13,15 @@
|
|||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include "clip.h"
|
#include "clip.h"
|
||||||
#include "llava.h"
|
#include "llava.h"
|
||||||
|
#include "log.h"
|
||||||
#include "stb_image.h"
|
#include "stb_image.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "grammar-parser.h"
|
|
||||||
#include "backend.pb.h"
|
#include "backend.pb.h"
|
||||||
#include "backend.grpc.pb.h"
|
#include "backend.grpc.pb.h"
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
|
#include "sampling.h"
|
||||||
// include std::regex
|
// include std::regex
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
@@ -203,8 +203,8 @@ struct llama_client_slot
|
|||||||
std::string stopping_word;
|
std::string stopping_word;
|
||||||
|
|
||||||
// sampling
|
// sampling
|
||||||
struct llama_sampling_params sparams;
|
struct gpt_sampler_params sparams;
|
||||||
llama_sampling_context *ctx_sampling = nullptr;
|
gpt_sampler *ctx_sampling = nullptr;
|
||||||
|
|
||||||
int32_t ga_i = 0; // group-attention state
|
int32_t ga_i = 0; // group-attention state
|
||||||
int32_t ga_n = 1; // group-attention factor
|
int32_t ga_n = 1; // group-attention factor
|
||||||
@@ -449,7 +449,7 @@ struct llama_server_context
|
|||||||
LOG_INFO("Multi Modal Mode Enabled", {});
|
LOG_INFO("Multi Modal Mode Enabled", {});
|
||||||
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
|
||||||
if(clp_ctx == nullptr) {
|
if(clp_ctx == nullptr) {
|
||||||
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
|
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -458,10 +458,12 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
llama_init_result llama_init = llama_init_from_gpt_params(params);
|
||||||
|
model = llama_init.model;
|
||||||
|
ctx = llama_init.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERROR("unable to load model", {{"model", params.model}});
|
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -469,7 +471,7 @@ struct llama_server_context
|
|||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
const int n_embd_llm = llama_n_embd(model);
|
const int n_embd_llm = llama_n_embd(model);
|
||||||
if (n_embd_clip != n_embd_llm) {
|
if (n_embd_clip != n_embd_llm) {
|
||||||
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
return false;
|
return false;
|
||||||
@@ -478,7 +480,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
n_ctx = llama_n_ctx(ctx);
|
n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
add_bos_token = llama_should_add_bos_token(model);
|
add_bos_token = llama_add_bos_token(model);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -488,11 +490,21 @@ struct llama_server_context
|
|||||||
std::vector<char> buf(1);
|
std::vector<char> buf(1);
|
||||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
|
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
||||||
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_client_slot* get_active_slot() {
|
||||||
|
for (llama_client_slot& slot : slots) {
|
||||||
|
// Check if the slot is currently processing
|
||||||
|
if (slot.is_processing()) {
|
||||||
|
return &slot; // Return the active slot
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr; // No active slot found
|
||||||
|
}
|
||||||
|
|
||||||
void initialize() {
|
void initialize() {
|
||||||
// create slots
|
// create slots
|
||||||
all_slots_are_idle = true;
|
all_slots_are_idle = true;
|
||||||
@@ -617,7 +629,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||||
slot_params default_params;
|
slot_params default_params;
|
||||||
llama_sampling_params default_sparams;
|
gpt_sampler_params default_sparams;
|
||||||
|
|
||||||
slot->params.stream = json_value(data, "stream", false);
|
slot->params.stream = json_value(data, "stream", false);
|
||||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||||
@@ -626,7 +638,7 @@ struct llama_server_context
|
|||||||
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
||||||
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
||||||
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||||
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
||||||
@@ -639,7 +651,7 @@ struct llama_server_context
|
|||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->params.seed = json_value(data, "seed", default_params.seed);
|
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
@@ -663,6 +675,7 @@ struct llama_server_context
|
|||||||
slot->params.input_prefix = "";
|
slot->params.input_prefix = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (data.count("input_suffix") != 0)
|
if (data.count("input_suffix") != 0)
|
||||||
{
|
{
|
||||||
slot->params.input_suffix = data["input_suffix"];
|
slot->params.input_suffix = data["input_suffix"];
|
||||||
@@ -681,6 +694,10 @@ struct llama_server_context
|
|||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (json_value(data, "ignore_eos", false)) {
|
||||||
|
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
||||||
|
}
|
||||||
|
/*
|
||||||
slot->sparams.penalty_prompt_tokens.clear();
|
slot->sparams.penalty_prompt_tokens.clear();
|
||||||
slot->sparams.use_penalty_prompt_tokens = false;
|
slot->sparams.use_penalty_prompt_tokens = false;
|
||||||
const auto &penalty_prompt = data.find("penalty_prompt");
|
const auto &penalty_prompt = data.find("penalty_prompt");
|
||||||
@@ -716,14 +733,10 @@ struct llama_server_context
|
|||||||
slot->sparams.use_penalty_prompt_tokens = true;
|
slot->sparams.use_penalty_prompt_tokens = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
slot->sparams.logit_bias.clear();
|
slot->sparams.logit_bias.clear();
|
||||||
|
|
||||||
if (json_value(data, "ignore_eos", false))
|
|
||||||
{
|
|
||||||
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto &logit_bias = data.find("logit_bias");
|
const auto &logit_bias = data.find("logit_bias");
|
||||||
if (logit_bias != data.end() && logit_bias->is_array())
|
if (logit_bias != data.end() && logit_bias->is_array())
|
||||||
{
|
{
|
||||||
@@ -751,7 +764,7 @@ struct llama_server_context
|
|||||||
llama_token tok = el[0].get<llama_token>();
|
llama_token tok = el[0].get<llama_token>();
|
||||||
if (tok >= 0 && tok < n_vocab)
|
if (tok >= 0 && tok < n_vocab)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias[tok] = bias;
|
slot->sparams.logit_bias.push_back({tok, bias});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (el[0].is_string())
|
else if (el[0].is_string())
|
||||||
@@ -759,13 +772,13 @@ struct llama_server_context
|
|||||||
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
|
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
|
||||||
for (auto tok : toks)
|
for (auto tok : toks)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias[tok] = bias;
|
slot->sparams.logit_bias.push_back({tok, bias});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slot->params.antiprompt.clear();
|
slot->params.antiprompt.clear();
|
||||||
|
|
||||||
const auto &stop = data.find("stop");
|
const auto &stop = data.find("stop");
|
||||||
@@ -779,24 +792,22 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto &samplers_sequence = data.find("samplers");
|
const auto & samplers = data.find("samplers");
|
||||||
if (samplers_sequence != data.end() && samplers_sequence->is_array())
|
if (samplers != data.end() && samplers->is_array()) {
|
||||||
{
|
|
||||||
std::vector<std::string> sampler_names;
|
std::vector<std::string> sampler_names;
|
||||||
for (const auto &sampler_name : *samplers_sequence)
|
for (const auto & name : *samplers) {
|
||||||
{
|
if (name.is_string()) {
|
||||||
if (sampler_name.is_string())
|
sampler_names.emplace_back(name);
|
||||||
{
|
}
|
||||||
sampler_names.emplace_back(sampler_name);
|
|
||||||
}
|
}
|
||||||
}
|
slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
|
||||||
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
|
slot->sparams.samplers = default_sparams.samplers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (multimodal)
|
if (multimodal)
|
||||||
{
|
{
|
||||||
@@ -812,10 +823,11 @@ struct llama_server_context
|
|||||||
img_sl.img_data = clip_image_u8_init();
|
img_sl.img_data = clip_image_u8_init();
|
||||||
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
|
||||||
{
|
{
|
||||||
LOG_ERROR("failed to load image", {
|
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d",
|
||||||
{"slot_id", slot->id},
|
__func__,
|
||||||
{"img_sl_id", img_sl.id}
|
slot->id,
|
||||||
});
|
img_sl.id
|
||||||
|
);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOG_VERBOSE("image loaded", {
|
LOG_VERBOSE("image loaded", {
|
||||||
@@ -853,12 +865,12 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
|
LOG("ERROR: Image with id: %i, not found.\n", img_id);
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (const std::invalid_argument& e) {
|
} catch (const std::invalid_argument& e) {
|
||||||
LOG_TEE("Invalid image number id in prompt\n");
|
LOG("Invalid image number id in prompt\n");
|
||||||
slot->images.clear();
|
slot->images.clear();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -873,10 +885,10 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (slot->ctx_sampling != nullptr)
|
if (slot->ctx_sampling != nullptr)
|
||||||
{
|
{
|
||||||
llama_sampling_free(slot->ctx_sampling);
|
gpt_sampler_free(slot->ctx_sampling);
|
||||||
}
|
}
|
||||||
slot->ctx_sampling = llama_sampling_init(slot->sparams);
|
slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
|
||||||
llama_set_rng_seed(ctx, slot->params.seed);
|
//llama_set_rng_seed(ctx, slot->params.seed);
|
||||||
slot->command = LOAD_PROMPT;
|
slot->command = LOAD_PROMPT;
|
||||||
|
|
||||||
all_slots_are_idle = false;
|
all_slots_are_idle = false;
|
||||||
@@ -886,7 +898,7 @@ struct llama_server_context
|
|||||||
{"task_id", slot->task_id},
|
{"task_id", slot->task_id},
|
||||||
});
|
});
|
||||||
|
|
||||||
LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -926,7 +938,7 @@ struct llama_server_context
|
|||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view) != 0)
|
if (llama_decode(ctx, batch_view) != 0)
|
||||||
{
|
{
|
||||||
LOG_TEE("%s: llama_decode() failed\n", __func__);
|
LOG("%s: llama_decode() failed\n", __func__);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -938,7 +950,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("system prompt updated\n");
|
LOG("system prompt updated\n");
|
||||||
system_need_update = false;
|
system_need_update = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1004,11 +1016,13 @@ struct llama_server_context
|
|||||||
slot.generated_text += token_str;
|
slot.generated_text += token_str;
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
|
|
||||||
|
/*
|
||||||
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
|
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
|
||||||
{
|
{
|
||||||
// we can change penalty_prompt_tokens because it is always created from scratch each request
|
// we can change penalty_prompt_tokens because it is always created from scratch each request
|
||||||
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
|
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// check if there is incomplete UTF-8 character at the end
|
// check if there is incomplete UTF-8 character at the end
|
||||||
bool incomplete = false;
|
bool incomplete = false;
|
||||||
@@ -1117,8 +1131,8 @@ struct llama_server_context
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
|
||||||
LOG_TEE("Error processing the given image");
|
LOG("Error processing the given image");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1130,7 +1144,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
void send_error(task_server& task, const std::string &error)
|
void send_error(task_server& task, const std::string &error)
|
||||||
{
|
{
|
||||||
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
|
LOG("task %i - error: %s\n", task.id, error.c_str());
|
||||||
task_result res;
|
task_result res;
|
||||||
res.id = task.id;
|
res.id = task.id;
|
||||||
res.multitask_id = task.multitask_id;
|
res.multitask_id = task.multitask_id;
|
||||||
@@ -1142,13 +1156,11 @@ struct llama_server_context
|
|||||||
|
|
||||||
json get_formated_generation(llama_client_slot &slot)
|
json get_formated_generation(llama_client_slot &slot)
|
||||||
{
|
{
|
||||||
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
std::vector<std::string> samplers;
|
||||||
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
samplers.reserve(slot.sparams.samplers.size());
|
||||||
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
|
for (const auto & sampler : slot.sparams.samplers)
|
||||||
std::vector<std::string> samplers_sequence;
|
|
||||||
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
|
||||||
{
|
{
|
||||||
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
|
samplers.emplace_back(gpt_sampler_type_to_str(sampler));
|
||||||
}
|
}
|
||||||
|
|
||||||
return json {
|
return json {
|
||||||
@@ -1163,13 +1175,11 @@ struct llama_server_context
|
|||||||
{"top_p", slot.sparams.top_p},
|
{"top_p", slot.sparams.top_p},
|
||||||
{"min_p", slot.sparams.min_p},
|
{"min_p", slot.sparams.min_p},
|
||||||
{"tfs_z", slot.sparams.tfs_z},
|
{"tfs_z", slot.sparams.tfs_z},
|
||||||
{"typical_p", slot.sparams.typical_p},
|
{"typical_p", slot.sparams.typ_p},
|
||||||
{"repeat_last_n", slot.sparams.penalty_last_n},
|
{"repeat_last_n", slot.sparams.penalty_last_n},
|
||||||
{"repeat_penalty", slot.sparams.penalty_repeat},
|
{"repeat_penalty", slot.sparams.penalty_repeat},
|
||||||
{"presence_penalty", slot.sparams.penalty_present},
|
{"presence_penalty", slot.sparams.penalty_present},
|
||||||
{"frequency_penalty", slot.sparams.penalty_freq},
|
{"frequency_penalty", slot.sparams.penalty_freq},
|
||||||
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
|
|
||||||
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
|
|
||||||
{"mirostat", slot.sparams.mirostat},
|
{"mirostat", slot.sparams.mirostat},
|
||||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||||
@@ -1177,13 +1187,13 @@ struct llama_server_context
|
|||||||
{"stop", slot.params.antiprompt},
|
{"stop", slot.params.antiprompt},
|
||||||
{"n_predict", slot.params.n_predict},
|
{"n_predict", slot.params.n_predict},
|
||||||
{"n_keep", params.n_keep},
|
{"n_keep", params.n_keep},
|
||||||
{"ignore_eos", ignore_eos},
|
{"ignore_eos", slot.sparams.ignore_eos},
|
||||||
{"stream", slot.params.stream},
|
{"stream", slot.params.stream},
|
||||||
{"logit_bias", slot.sparams.logit_bias},
|
// {"logit_bias", slot.sparams.logit_bias},
|
||||||
{"n_probs", slot.sparams.n_probs},
|
{"n_probs", slot.sparams.n_probs},
|
||||||
{"min_keep", slot.sparams.min_keep},
|
{"min_keep", slot.sparams.min_keep},
|
||||||
{"grammar", slot.sparams.grammar},
|
{"grammar", slot.sparams.grammar},
|
||||||
{"samplers", samplers_sequence}
|
{"samplers", samplers}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1373,7 +1383,7 @@ struct llama_server_context
|
|||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view))
|
if (llama_decode(ctx, batch_view))
|
||||||
{
|
{
|
||||||
LOG_TEE("%s : failed to eval\n", __func__);
|
LOG("%s : failed to eval\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1391,7 +1401,7 @@ struct llama_server_context
|
|||||||
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
||||||
if (llama_decode(ctx, batch_img))
|
if (llama_decode(ctx, batch_img))
|
||||||
{
|
{
|
||||||
LOG_TEE("%s : failed to eval image\n", __func__);
|
LOG("%s : failed to eval image\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
slot.n_past += n_eval;
|
slot.n_past += n_eval;
|
||||||
@@ -1574,7 +1584,7 @@ struct llama_server_context
|
|||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.truncated = false;
|
slot.truncated = false;
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
// END LOCALAI changes
|
// END LOCALAI changes
|
||||||
@@ -1712,7 +1722,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (!slot.params.cache_prompt)
|
if (!slot.params.cache_prompt)
|
||||||
{
|
{
|
||||||
llama_sampling_reset(slot.ctx_sampling);
|
gpt_sampler_reset(slot.ctx_sampling);
|
||||||
|
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.n_past_se = 0;
|
slot.n_past_se = 0;
|
||||||
@@ -1724,7 +1734,7 @@ struct llama_server_context
|
|||||||
// push the prompt into the sampling context (do not apply grammar)
|
// push the prompt into the sampling context (do not apply grammar)
|
||||||
for (auto &token : prompt_tokens)
|
for (auto &token : prompt_tokens)
|
||||||
{
|
{
|
||||||
llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
|
gpt_sampler_accept(slot.ctx_sampling, token, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
||||||
@@ -1822,10 +1832,11 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (has_images && !ingest_images(slot, n_batch))
|
if (has_images && !ingest_images(slot, n_batch))
|
||||||
{
|
{
|
||||||
LOG_ERROR("failed processing images", {
|
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d",
|
||||||
"slot_id", slot.id,
|
__func__,
|
||||||
"task_id", slot.task_id,
|
slot.id,
|
||||||
});
|
slot.task_id
|
||||||
|
);
|
||||||
// FIXME @phymbert: to be properly tested
|
// FIXME @phymbert: to be properly tested
|
||||||
// early returning without changing the slot state will block the slot for ever
|
// early returning without changing the slot state will block the slot for ever
|
||||||
// no one at the moment is checking the return value
|
// no one at the moment is checking the return value
|
||||||
@@ -1865,10 +1876,10 @@ struct llama_server_context
|
|||||||
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
|
||||||
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
|
||||||
|
|
||||||
LOG_TEE("\n");
|
LOG("\n");
|
||||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
|
||||||
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
|
||||||
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
|
||||||
|
|
||||||
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
|
||||||
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
|
||||||
@@ -1878,7 +1889,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
slot.ga_i += slot.ga_w / slot.ga_n;
|
slot.ga_i += slot.ga_w / slot.ga_n;
|
||||||
|
|
||||||
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
|
||||||
}
|
}
|
||||||
slot.n_past_se += n_tokens;
|
slot.n_past_se += n_tokens;
|
||||||
}
|
}
|
||||||
@@ -1903,11 +1914,11 @@ struct llama_server_context
|
|||||||
if (n_batch == 1 || ret < 0)
|
if (n_batch == 1 || ret < 0)
|
||||||
{
|
{
|
||||||
// if you get here, it means the KV cache is full - try increasing it via the context size
|
// if you get here, it means the KV cache is full - try increasing it via the context size
|
||||||
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
||||||
|
|
||||||
// retry with half the batch size to try to find a free slot in the KV cache
|
// retry with half the batch size to try to find a free slot in the KV cache
|
||||||
n_batch /= 2;
|
n_batch /= 2;
|
||||||
@@ -1932,9 +1943,9 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
completion_token_output result;
|
completion_token_output result;
|
||||||
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
|
const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
|
||||||
|
|
||||||
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
|
gpt_sampler_accept(slot.ctx_sampling, id, true);
|
||||||
|
|
||||||
slot.n_decoded += 1;
|
slot.n_decoded += 1;
|
||||||
if (slot.n_decoded == 1)
|
if (slot.n_decoded == 1)
|
||||||
@@ -1944,19 +1955,14 @@ struct llama_server_context
|
|||||||
metrics.on_prompt_eval(slot);
|
metrics.on_prompt_eval(slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
|
|
||||||
result.tok = id;
|
result.tok = id;
|
||||||
|
const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
|
||||||
|
|
||||||
const int32_t n_probs = slot.sparams.n_probs;
|
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
|
||||||
if (slot.sparams.temp <= 0 && n_probs > 0)
|
result.probs.push_back({
|
||||||
{
|
cur_p->data[i].id,
|
||||||
// for llama_sample_token_greedy we need to sort candidates
|
i >= cur_p->size ? 0.0f : cur_p->data[i].p,
|
||||||
llama_sample_softmax(ctx, &cur_p);
|
});
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
|
|
||||||
{
|
|
||||||
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!process_token(result, slot))
|
if (!process_token(result, slot))
|
||||||
@@ -2110,6 +2116,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["ignore_eos"] = predict->ignoreeos();
|
data["ignore_eos"] = predict->ignoreeos();
|
||||||
data["embeddings"] = predict->embeddings();
|
data["embeddings"] = predict->embeddings();
|
||||||
|
|
||||||
|
// Add the correlationid to json data
|
||||||
|
data["correlation_id"] = predict->correlationid();
|
||||||
|
|
||||||
// for each image in the request, add the image data
|
// for each image in the request, add the image data
|
||||||
//
|
//
|
||||||
for (int i = 0; i < predict->images_size(); i++) {
|
for (int i = 0; i < predict->images_size(); i++) {
|
||||||
@@ -2208,7 +2217,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.model_alias = request->modelfile();
|
params.model_alias = request->modelfile();
|
||||||
params.n_ctx = request->contextsize();
|
params.n_ctx = request->contextsize();
|
||||||
//params.memory_f16 = request->f16memory();
|
//params.memory_f16 = request->f16memory();
|
||||||
params.n_threads = request->threads();
|
params.cpuparams.n_threads = request->threads();
|
||||||
params.n_gpu_layers = request->ngpulayers();
|
params.n_gpu_layers = request->ngpulayers();
|
||||||
params.n_batch = request->nbatch();
|
params.n_batch = request->nbatch();
|
||||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||||
@@ -2258,7 +2267,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
}
|
}
|
||||||
// get the directory of modelfile
|
// get the directory of modelfile
|
||||||
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
|
||||||
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
|
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
|
||||||
}
|
}
|
||||||
params.use_mlock = request->mlock();
|
params.use_mlock = request->mlock();
|
||||||
params.use_mmap = request->mmap();
|
params.use_mmap = request->mmap();
|
||||||
@@ -2348,6 +2357,11 @@ public:
|
|||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
reply.set_prompt_tokens(tokens_evaluated);
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
// Send the reply
|
// Send the reply
|
||||||
writer->Write(reply);
|
writer->Write(reply);
|
||||||
|
|
||||||
@@ -2371,6 +2385,12 @@ public:
|
|||||||
std::string completion_text;
|
std::string completion_text;
|
||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
completion_text = result.result_json.value("content", "");
|
completion_text = result.result_json.value("content", "");
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
@@ -2410,6 +2430,31 @@ public:
|
|||||||
|
|
||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||||
|
llama_client_slot* active_slot = llama.get_active_slot();
|
||||||
|
|
||||||
|
if (active_slot != nullptr) {
|
||||||
|
// Calculate the tokens per second using existing logic
|
||||||
|
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
|
||||||
|
|
||||||
|
// Populate the response with metrics
|
||||||
|
response->set_slot_id(active_slot->id);
|
||||||
|
response->set_prompt_json_for_slot(active_slot->prompt.dump());
|
||||||
|
response->set_tokens_per_second(tokens_per_second);
|
||||||
|
response->set_tokens_generated(active_slot->n_decoded);
|
||||||
|
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
|
||||||
|
} else {
|
||||||
|
// Handle case when no active slot exists
|
||||||
|
response->set_slot_id(0);
|
||||||
|
response->set_prompt_json_for_slot("");
|
||||||
|
response->set_tokens_per_second(0);
|
||||||
|
response->set_tokens_generated(0);
|
||||||
|
response->set_prompt_tokens_processed(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return grpc::Status::OK;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void RunServer(const std::string& server_address) {
|
void RunServer(const std::string& server_address) {
|
||||||
|
|||||||
13
backend/cpp/llama/patches/01-llava.patch
Normal file
13
backend/cpp/llama/patches/01-llava.patch
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
|
index 342042ff..224db9b5 100644
|
||||||
|
--- a/examples/llava/clip.cpp
|
||||||
|
+++ b/examples/llava/clip.cpp
|
||||||
|
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
|
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||||
|
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||||
|
for (int i = 0; i < num_patches; i++) {
|
||||||
|
- patches_data[i] = i + 1;
|
||||||
|
+ patches_data[i] = i;
|
||||||
|
}
|
||||||
|
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
||||||
|
free(patches_data);
|
||||||
@@ -1,5 +1,12 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
## Patches
|
||||||
|
## Apply patches from the `patches` directory
|
||||||
|
for patch in $(ls patches); do
|
||||||
|
echo "Applying patch $patch"
|
||||||
|
patch -d llama.cpp/ -p1 < patches/$patch
|
||||||
|
done
|
||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
|||||||
@@ -480,31 +480,4 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
|||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// random string / id
|
|
||||||
//
|
|
||||||
|
|
||||||
static std::string random_string()
|
|
||||||
{
|
|
||||||
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
|
|
||||||
|
|
||||||
std::random_device rd;
|
|
||||||
std::mt19937 generator(rd());
|
|
||||||
|
|
||||||
std::string result(32, ' ');
|
|
||||||
|
|
||||||
for (int i = 0; i < 32; ++i) {
|
|
||||||
result[i] = str[generator() % str.size()];
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string gen_chatcmplid()
|
|
||||||
{
|
|
||||||
std::stringstream chatcmplid;
|
|
||||||
chatcmplid << "chatcmpl-" << random_string();
|
|
||||||
return chatcmplid.str();
|
|
||||||
}
|
}
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
|
||||||
)
|
|
||||||
|
|
||||||
type LLM struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
gpt4all *gpt4all.Model
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := gpt4all.New(opts.ModelFile,
|
|
||||||
gpt4all.SetThreads(int(opts.Threads)),
|
|
||||||
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
|
||||||
llm.gpt4all = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
|
|
||||||
predictOptions := []gpt4all.PredictOption{
|
|
||||||
gpt4all.SetTemperature(float64(opts.Temperature)),
|
|
||||||
gpt4all.SetTopP(float64(opts.TopP)),
|
|
||||||
gpt4all.SetTopK(int(opts.TopK)),
|
|
||||||
gpt4all.SetTokens(int(opts.Tokens)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
|
|
||||||
}
|
|
||||||
return predictOptions
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
llm.gpt4all.SetTokenCallback(func(token string) bool {
|
|
||||||
results <- token
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
llm.gpt4all.SetTokenCallback(nil)
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,104 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
||||||
"github.com/go-audio/wav"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
func ffmpegCommand(args []string) (string, error) {
|
|
||||||
cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
|
|
||||||
cmd.Env = os.Environ()
|
|
||||||
out, err := cmd.CombinedOutput()
|
|
||||||
return string(out), err
|
|
||||||
}
|
|
||||||
|
|
||||||
// AudioToWav converts audio to wav for transcribe.
|
|
||||||
// TODO: use https://github.com/mccoyst/ogg?
|
|
||||||
func audioToWav(src, dst string) error {
|
|
||||||
commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
|
|
||||||
out, err := ffmpegCommand(commandArgs)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("error: %w out: %s", err, out)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
|
|
||||||
res := schema.TranscriptionResult{}
|
|
||||||
|
|
||||||
dir, err := os.MkdirTemp("", "whisper")
|
|
||||||
if err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
defer os.RemoveAll(dir)
|
|
||||||
|
|
||||||
convertedPath := filepath.Join(dir, "converted.wav")
|
|
||||||
|
|
||||||
if err := audioToWav(audiopath, convertedPath); err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open samples
|
|
||||||
fh, err := os.Open(convertedPath)
|
|
||||||
if err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
defer fh.Close()
|
|
||||||
|
|
||||||
// Read samples
|
|
||||||
d := wav.NewDecoder(fh)
|
|
||||||
buf, err := d.FullPCMBuffer()
|
|
||||||
if err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
|
|
||||||
data := buf.AsFloat32Buffer().Data
|
|
||||||
|
|
||||||
// Process samples
|
|
||||||
context, err := model.NewContext()
|
|
||||||
if err != nil {
|
|
||||||
return res, err
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
context.SetThreads(threads)
|
|
||||||
|
|
||||||
if language != "" {
|
|
||||||
context.SetLanguage(language)
|
|
||||||
} else {
|
|
||||||
context.SetLanguage("auto")
|
|
||||||
}
|
|
||||||
|
|
||||||
if translate {
|
|
||||||
context.SetTranslate(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := context.Process(data, nil, nil); err != nil {
|
|
||||||
return res, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
|
||||||
s, err := context.NextSegment()
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
var tokens []int
|
|
||||||
for _, t := range s.Tokens {
|
|
||||||
tokens = append(tokens, t.Id)
|
|
||||||
}
|
|
||||||
|
|
||||||
segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
|
|
||||||
res.Segments = append(res.Segments, segment)
|
|
||||||
|
|
||||||
res.Text += s.Text
|
|
||||||
}
|
|
||||||
|
|
||||||
return res, nil
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Whisper struct {
|
|
||||||
base.SingleThread
|
|
||||||
whisper whisper.Model
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
|
||||||
w, err := whisper.New(opts.ModelFile)
|
|
||||||
sd.whisper = w
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
|
|
||||||
return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
|
|
||||||
}
|
|
||||||
105
backend/go/transcribe/whisper/whisper.go
Normal file
105
backend/go/transcribe/whisper/whisper.go
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
|
"github.com/go-audio/wav"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Whisper struct {
|
||||||
|
base.SingleThread
|
||||||
|
whisper whisper.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
||||||
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
w, err := whisper.New(opts.ModelFile)
|
||||||
|
sd.whisper = w
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
|
||||||
|
|
||||||
|
dir, err := os.MkdirTemp("", "whisper")
|
||||||
|
if err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(dir)
|
||||||
|
|
||||||
|
convertedPath := filepath.Join(dir, "converted.wav")
|
||||||
|
|
||||||
|
if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open samples
|
||||||
|
fh, err := os.Open(convertedPath)
|
||||||
|
if err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
}
|
||||||
|
defer fh.Close()
|
||||||
|
|
||||||
|
// Read samples
|
||||||
|
d := wav.NewDecoder(fh)
|
||||||
|
buf, err := d.FullPCMBuffer()
|
||||||
|
if err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data := buf.AsFloat32Buffer().Data
|
||||||
|
|
||||||
|
// Process samples
|
||||||
|
context, err := sd.whisper.NewContext()
|
||||||
|
if err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
context.SetThreads(uint(opts.Threads))
|
||||||
|
|
||||||
|
if opts.Language != "" {
|
||||||
|
context.SetLanguage(opts.Language)
|
||||||
|
} else {
|
||||||
|
context.SetLanguage("auto")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.Translate {
|
||||||
|
context.SetTranslate(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := context.Process(data, nil, nil); err != nil {
|
||||||
|
return pb.TranscriptResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
segments := []*pb.TranscriptSegment{}
|
||||||
|
text := ""
|
||||||
|
for {
|
||||||
|
s, err := context.NextSegment()
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
var tokens []int32
|
||||||
|
for _, t := range s.Tokens {
|
||||||
|
tokens = append(tokens, int32(t.Id))
|
||||||
|
}
|
||||||
|
|
||||||
|
segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
|
||||||
|
segments = append(segments, segment)
|
||||||
|
|
||||||
|
text += s.Text
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.TranscriptResult{
|
||||||
|
Segments: segments,
|
||||||
|
Text: text,
|
||||||
|
}, nil
|
||||||
|
|
||||||
|
}
|
||||||
@@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.65.4
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
4
backend/python/bark/requirements-cpu.txt
Normal file
4
backend/python/bark/requirements-cpu.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,2 +1,4 @@
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
accelerate
|
|
||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.65.4
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
|
||||||
@@ -18,10 +18,23 @@
|
|||||||
# source $(dirname $0)/../common/libbackend.sh
|
# source $(dirname $0)/../common/libbackend.sh
|
||||||
#
|
#
|
||||||
function init() {
|
function init() {
|
||||||
|
# Name of the backend (directory name)
|
||||||
BACKEND_NAME=${PWD##*/}
|
BACKEND_NAME=${PWD##*/}
|
||||||
|
|
||||||
|
# Path where all backends files are
|
||||||
MY_DIR=$(realpath `dirname $0`)
|
MY_DIR=$(realpath `dirname $0`)
|
||||||
|
|
||||||
|
# Build type
|
||||||
BUILD_PROFILE=$(getBuildProfile)
|
BUILD_PROFILE=$(getBuildProfile)
|
||||||
|
|
||||||
|
# Environment directory
|
||||||
|
EDIR=${MY_DIR}
|
||||||
|
|
||||||
|
# Allow to specify a custom env dir for shared environments
|
||||||
|
if [ "x${ENV_DIR}" != "x" ]; then
|
||||||
|
EDIR=${ENV_DIR}
|
||||||
|
fi
|
||||||
|
|
||||||
# If a backend has defined a list of valid build profiles...
|
# If a backend has defined a list of valid build profiles...
|
||||||
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
||||||
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
||||||
@@ -74,13 +87,14 @@ function getBuildProfile() {
|
|||||||
# This function is idempotent, so you can call it as many times as you want and it will
|
# This function is idempotent, so you can call it as many times as you want and it will
|
||||||
# always result in an activated virtual environment
|
# always result in an activated virtual environment
|
||||||
function ensureVenv() {
|
function ensureVenv() {
|
||||||
if [ ! -d "${MY_DIR}/venv" ]; then
|
if [ ! -d "${EDIR}/venv" ]; then
|
||||||
uv venv ${MY_DIR}/venv
|
uv venv ${EDIR}/venv
|
||||||
echo "virtualenv created"
|
echo "virtualenv created"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
|
# Source if we are not already in a Virtual env
|
||||||
source ${MY_DIR}/venv/bin/activate
|
if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
|
||||||
|
source ${EDIR}/venv/bin/activate
|
||||||
echo "virtualenv activated"
|
echo "virtualenv activated"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -113,21 +127,25 @@ function installRequirements() {
|
|||||||
|
|
||||||
# These are the requirements files we will attempt to install, in order
|
# These are the requirements files we will attempt to install, in order
|
||||||
declare -a requirementFiles=(
|
declare -a requirementFiles=(
|
||||||
"${MY_DIR}/requirements-install.txt"
|
"${EDIR}/requirements-install.txt"
|
||||||
"${MY_DIR}/requirements.txt"
|
"${EDIR}/requirements.txt"
|
||||||
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
|
"${EDIR}/requirements-${BUILD_TYPE}.txt"
|
||||||
)
|
)
|
||||||
|
|
||||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
||||||
if [ "x${BUILD_TYPE}" == "x" ]; then
|
if [ "x${BUILD_TYPE}" == "x" ]; then
|
||||||
requirementFiles+=("${MY_DIR}/requirements-cpu.txt")
|
requirementFiles+=("${EDIR}/requirements-cpu.txt")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
requirementFiles+=("${MY_DIR}/requirements-after.txt")
|
requirementFiles+=("${EDIR}/requirements-after.txt")
|
||||||
|
|
||||||
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
||||||
|
fi
|
||||||
|
|
||||||
for reqFile in ${requirementFiles[@]}; do
|
for reqFile in ${requirementFiles[@]}; do
|
||||||
if [ -f ${reqFile} ]; then
|
if [ -f ${reqFile} ]; then
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
grpcio==1.65.4
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
3
backend/python/coqui/requirements-cpu.txt
Normal file
3
backend/python/coqui/requirements-cpu.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,2 +1,4 @@
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
accelerate
|
coqui-tts
|
||||||
TTS==0.22.0
|
grpcio==1.66.2
|
||||||
grpcio==1.65.4
|
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
|
||||||
@@ -18,13 +18,13 @@ import backend_pb2_grpc
|
|||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
EulerAncestralDiscreteScheduler
|
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
from diffusers.utils import load_image, export_to_video
|
from diffusers.utils import load_image, export_to_video
|
||||||
from compel import Compel, ReturnedEmbeddingsType
|
from compel import Compel, ReturnedEmbeddingsType
|
||||||
|
from optimum.quanto import freeze, qfloat8, quantize
|
||||||
from transformers import CLIPTextModel
|
from transformers import CLIPTextModel, T5EncoderModel
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
@@ -163,10 +163,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
modelFile = request.Model
|
modelFile = request.Model
|
||||||
|
|
||||||
self.cfg_scale = 7
|
self.cfg_scale = 7
|
||||||
|
self.PipelineType = request.PipelineType
|
||||||
|
|
||||||
if request.CFGScale != 0:
|
if request.CFGScale != 0:
|
||||||
self.cfg_scale = request.CFGScale
|
self.cfg_scale = request.CFGScale
|
||||||
|
|
||||||
clipmodel = "runwayml/stable-diffusion-v1-5"
|
clipmodel = "Lykon/dreamshaper-8"
|
||||||
if request.CLIPModel != "":
|
if request.CLIPModel != "":
|
||||||
clipmodel = request.CLIPModel
|
clipmodel = request.CLIPModel
|
||||||
clipsubfolder = "text_encoder"
|
clipsubfolder = "text_encoder"
|
||||||
@@ -244,6 +246,30 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
variant=variant)
|
variant=variant)
|
||||||
|
elif request.PipelineType == "FluxPipeline":
|
||||||
|
self.pipe = FluxPipeline.from_pretrained(
|
||||||
|
request.Model,
|
||||||
|
torch_dtype=torch.bfloat16)
|
||||||
|
if request.LowVRAM:
|
||||||
|
self.pipe.enable_model_cpu_offload()
|
||||||
|
elif request.PipelineType == "FluxTransformer2DModel":
|
||||||
|
dtype = torch.bfloat16
|
||||||
|
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
|
||||||
|
bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
|
||||||
|
|
||||||
|
transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
|
||||||
|
quantize(transformer, weights=qfloat8)
|
||||||
|
freeze(transformer)
|
||||||
|
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
|
||||||
|
quantize(text_encoder_2, weights=qfloat8)
|
||||||
|
freeze(text_encoder_2)
|
||||||
|
|
||||||
|
self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
|
||||||
|
self.pipe.transformer = transformer
|
||||||
|
self.pipe.text_encoder_2 = text_encoder_2
|
||||||
|
|
||||||
|
if request.LowVRAM:
|
||||||
|
self.pipe.enable_model_cpu_offload()
|
||||||
|
|
||||||
if CLIPSKIP and request.CLIPSkip != 0:
|
if CLIPSKIP and request.CLIPSkip != 0:
|
||||||
self.clip_skip = request.CLIPSkip
|
self.clip_skip = request.CLIPSkip
|
||||||
@@ -399,6 +425,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
request.seed
|
request.seed
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.PipelineType == "FluxPipeline":
|
||||||
|
kwargs["max_sequence_length"] = 256
|
||||||
|
|
||||||
|
if self.PipelineType == "FluxTransformer2DModel":
|
||||||
|
kwargs["output_type"] = "pil"
|
||||||
|
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
|
||||||
|
|
||||||
if self.img2vid:
|
if self.img2vid:
|
||||||
# Load the conditioning image
|
# Load the conditioning image
|
||||||
image = load_image(request.src)
|
image = load_image(request.src)
|
||||||
|
|||||||
9
backend/python/diffusers/requirements-cpu.txt
Normal file
9
backend/python/diffusers/requirements-cpu.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
torch
|
||||||
|
optimum-quanto
|
||||||
@@ -1,2 +1,10 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
@@ -1 +1,9 @@
|
|||||||
torch
|
torch
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
@@ -1,3 +1,11 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch==2.3.1+rocm6.0
|
||||||
torchvision
|
torchvision==0.18.1+rocm6.0
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
@@ -3,4 +3,12 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchvision
|
torchvision
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
@@ -1,12 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
accelerate
|
grpcio==1.66.2
|
||||||
compel
|
|
||||||
peft
|
|
||||||
diffusers
|
|
||||||
grpcio==1.65.4
|
|
||||||
opencv-python
|
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
sentencepiece
|
|
||||||
transformers
|
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
self.setUp()
|
self.setUp()
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
|
||||||
self.assertTrue(response.success)
|
self.assertTrue(response.success)
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
self.assertEqual(response.message, "Model loaded successfully")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
@@ -71,7 +71,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
self.setUp()
|
self.setUp()
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
|
||||||
print(response.message)
|
print(response.message)
|
||||||
self.assertTrue(response.success)
|
self.assertTrue(response.success)
|
||||||
image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
|
image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
|
||||||
@@ -81,4 +81,4 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
print(err)
|
print(err)
|
||||||
self.fail("Image gen service failed")
|
self.fail("Image gen service failed")
|
||||||
finally:
|
finally:
|
||||||
self.tearDown()
|
self.tearDown()
|
||||||
|
|||||||
1
backend/python/exllama/.gitignore
vendored
1
backend/python/exllama/.gitignore
vendored
@@ -1 +0,0 @@
|
|||||||
source
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
export CONDA_ENV_PATH = "exllama.yml"
|
|
||||||
|
|
||||||
.PHONY: exllama
|
|
||||||
exllama: protogen
|
|
||||||
bash install.sh ${CONDA_ENV_PATH}
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running exllama..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "exllama run."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
$(RM) -r venv source __pycache__
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
# Creating a separate environment for the exllama project
|
|
||||||
|
|
||||||
```
|
|
||||||
make exllama
|
|
||||||
```
|
|
||||||
@@ -1,159 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
import grpc
|
|
||||||
from concurrent import futures
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os, glob
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
import torch
|
|
||||||
import torch.nn.functional as F
|
|
||||||
from torch import version as torch_version
|
|
||||||
|
|
||||||
from source.tokenizer import ExLlamaTokenizer
|
|
||||||
from source.generator import ExLlamaGenerator
|
|
||||||
from source.model import ExLlama, ExLlamaCache, ExLlamaConfig
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
def generate(self,prompt, max_new_tokens):
|
|
||||||
self.generator.end_beam_search()
|
|
||||||
|
|
||||||
# Tokenizing the input
|
|
||||||
ids = self.generator.tokenizer.encode(prompt)
|
|
||||||
|
|
||||||
self.generator.gen_begin_reuse(ids)
|
|
||||||
initial_len = self.generator.sequence[0].shape[0]
|
|
||||||
has_leading_space = False
|
|
||||||
decoded_text = ''
|
|
||||||
for i in range(max_new_tokens):
|
|
||||||
token = self.generator.gen_single_token()
|
|
||||||
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
|
||||||
has_leading_space = True
|
|
||||||
|
|
||||||
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
|
|
||||||
if has_leading_space:
|
|
||||||
decoded_text = ' ' + decoded_text
|
|
||||||
|
|
||||||
if token.item() == self.generator.tokenizer.eos_token_id:
|
|
||||||
break
|
|
||||||
return decoded_text
|
|
||||||
def Health(self, request, context):
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
try:
|
|
||||||
# https://github.com/turboderp/exllama/blob/master/example_cfg.py
|
|
||||||
model_directory = request.ModelFile
|
|
||||||
|
|
||||||
# Locate files we need within that directory
|
|
||||||
tokenizer_path = os.path.join(model_directory, "tokenizer.model")
|
|
||||||
model_config_path = os.path.join(model_directory, "config.json")
|
|
||||||
st_pattern = os.path.join(model_directory, "*.safetensors")
|
|
||||||
model_path = glob.glob(st_pattern)[0]
|
|
||||||
|
|
||||||
# Create config, model, tokenizer and generator
|
|
||||||
|
|
||||||
config = ExLlamaConfig(model_config_path) # create config from config.json
|
|
||||||
config.model_path = model_path # supply path to model weights file
|
|
||||||
if (request.ContextSize):
|
|
||||||
config.max_seq_len = request.ContextSize # override max sequence length
|
|
||||||
config.max_attention_size = request.ContextSize**2 # Should be set to context_size^2.
|
|
||||||
# https://github.com/turboderp/exllama/issues/220#issuecomment-1720324163
|
|
||||||
|
|
||||||
# Set Rope scaling.
|
|
||||||
if (request.RopeFreqScale):
|
|
||||||
# Alpha value for Rope scaling.
|
|
||||||
# Higher value increases context but adds perplexity.
|
|
||||||
# alpha_value and compress_pos_emb are mutually exclusive.
|
|
||||||
# https://github.com/turboderp/exllama/issues/115
|
|
||||||
config.alpha_value = request.RopeFreqScale
|
|
||||||
config.calculate_rotary_embedding_base()
|
|
||||||
|
|
||||||
model = ExLlama(config) # create ExLlama instance and load the weights
|
|
||||||
tokenizer = ExLlamaTokenizer(tokenizer_path) # create tokenizer from tokenizer model file
|
|
||||||
|
|
||||||
cache = ExLlamaCache(model, batch_size = 2) # create cache for inference
|
|
||||||
generator = ExLlamaGenerator(model, tokenizer, cache) # create generator
|
|
||||||
|
|
||||||
self.generator= generator
|
|
||||||
self.model = model
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
self.cache = cache
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
penalty = 1.15
|
|
||||||
if request.Penalty != 0.0:
|
|
||||||
penalty = request.Penalty
|
|
||||||
self.generator.settings.token_repetition_penalty_max = penalty
|
|
||||||
self.generator.settings.temperature = request.Temperature
|
|
||||||
self.generator.settings.top_k = request.TopK
|
|
||||||
self.generator.settings.top_p = request.TopP
|
|
||||||
|
|
||||||
tokens = 512
|
|
||||||
if request.Tokens != 0:
|
|
||||||
tokens = request.Tokens
|
|
||||||
|
|
||||||
if self.cache.batch_size == 1:
|
|
||||||
del self.cache
|
|
||||||
self.cache = ExLlamaCache(self.model, batch_size=2)
|
|
||||||
self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
|
|
||||||
|
|
||||||
t = self.generate(request.Prompt, tokens)
|
|
||||||
|
|
||||||
# Remove prompt from response if present
|
|
||||||
if request.Prompt in t:
|
|
||||||
t = t.replace(request.Prompt, "")
|
|
||||||
|
|
||||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
# Implement PredictStream RPC
|
|
||||||
#for reply in some_data_generator():
|
|
||||||
# yield reply
|
|
||||||
# Not implemented yet
|
|
||||||
return self.Predict(request, context)
|
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
LIMIT_TARGETS="cublas"
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
|
|
||||||
git clone https://github.com/turboderp/exllama $MY_DIR/source
|
|
||||||
uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
|
|
||||||
|
|
||||||
cp -v ./*py $MY_DIR/source/
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
torch
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
grpcio==1.65.0
|
|
||||||
protobuf
|
|
||||||
transformers
|
|
||||||
certifi
|
|
||||||
setuptools
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
LIMIT_TARGETS="cublas"
|
|
||||||
BACKEND_FILE="${MY_DIR}/source/backend.py"
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
||||||
3
backend/python/exllama2/requirements-cpu.txt
Normal file
3
backend/python/exllama2/requirements-cpu.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
@@ -1,2 +1,4 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1 +1,3 @@
|
|||||||
torch
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
accelerate
|
grpcio==1.66.2
|
||||||
grpcio==1.65.4
|
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
torch
|
torch
|
||||||
|
transformers
|
||||||
@@ -1,2 +1,3 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
|
transformers
|
||||||
@@ -1 +1,2 @@
|
|||||||
torch
|
torch
|
||||||
|
transformers
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
grpcio==1.65.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
|
||||||
1
backend/python/openvoice/requirements-cpu.txt
Normal file
1
backend/python/openvoice/requirements-cpu.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
torch
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.65.4
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==1.0.3
|
faster-whisper==1.0.3
|
||||||
@@ -18,6 +18,6 @@ python-dotenv
|
|||||||
pypinyin==0.50.0
|
pypinyin==0.50.0
|
||||||
cn2an==0.5.22
|
cn2an==0.5.22
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
gradio==4.38.1
|
gradio==4.44.1
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.65.4
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa
|
librosa
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
|||||||
@@ -15,5 +15,12 @@ installRequirements
|
|||||||
|
|
||||||
# https://github.com/descriptinc/audiotools/issues/101
|
# https://github.com/descriptinc/audiotools/issues/101
|
||||||
# incompatible protobuf versions.
|
# incompatible protobuf versions.
|
||||||
PYDIR=$(ls ${MY_DIR}/venv/lib)
|
PYDIR=python3.10
|
||||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
|
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
||||||
|
|
||||||
|
if [ ! -d ${pyenv} ]; then
|
||||||
|
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
||||||
|
|||||||
3
backend/python/parler-tts/requirements-after.txt
Normal file
3
backend/python/parler-tts/requirements-after.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
||||||
|
llvmlite==0.43.0
|
||||||
|
numba==0.60.0
|
||||||
3
backend/python/parler-tts/requirements-cpu.txt
Normal file
3
backend/python/parler-tts/requirements-cpu.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,2 +1,4 @@
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch==2.3.0+rocm6.0
|
||||||
torchaudio
|
torchaudio==2.3.0+rocm6.0
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
|||||||
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
accelerate
|
grpcio==1.66.2
|
||||||
grpcio==1.65.1
|
|
||||||
protobuf
|
protobuf
|
||||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
|
||||||
certifi
|
certifi
|
||||||
transformers
|
llvmlite==0.43.0
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
.PHONY: petals
|
|
||||||
petals: protogen
|
|
||||||
@echo "Creating virtual environment..."
|
|
||||||
bash install.sh "petals.yml"
|
|
||||||
@echo "Virtual environment created."
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running petals..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "petals run."
|
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing petals..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "petals tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
||||||
@@ -1,140 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
from concurrent import futures
|
|
||||||
import time
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
import torch
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
from petals import AutoDistributedModelForCausalLM
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer that implements the Backend service defined in backend.proto.
|
|
||||||
"""
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
Returns a health check message.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The health check request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Reply: The health check reply.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
Loads a language model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The load model request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The load model result.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False)
|
|
||||||
self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model)
|
|
||||||
self.cuda = False
|
|
||||||
if request.CUDA:
|
|
||||||
self.model = self.model.cuda()
|
|
||||||
self.cuda = True
|
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict result.
|
|
||||||
"""
|
|
||||||
|
|
||||||
inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"]
|
|
||||||
if self.cuda:
|
|
||||||
inputs = inputs.cuda()
|
|
||||||
|
|
||||||
if request.Tokens == 0:
|
|
||||||
# Max to max value if tokens are not specified
|
|
||||||
request.Tokens = 8192
|
|
||||||
|
|
||||||
# TODO: kwargs and map all parameters
|
|
||||||
outputs = self.model.generate(inputs, max_new_tokens=request.Tokens)
|
|
||||||
|
|
||||||
generated_text = self.tokenizer.decode(outputs[0])
|
|
||||||
# Remove prompt from response if present
|
|
||||||
if request.Prompt in generated_text:
|
|
||||||
generated_text = generated_text.replace(request.Prompt, "")
|
|
||||||
|
|
||||||
return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8'))
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict stream request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict stream result.
|
|
||||||
"""
|
|
||||||
# Implement PredictStream RPC
|
|
||||||
#for reply in some_data_generator():
|
|
||||||
# yield reply
|
|
||||||
# Not implemented yet
|
|
||||||
return self.Predict(request, context)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user