mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 16:51:44 -04:00
Compare commits
291 Commits
propagate_
...
extra-l4t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
27d7ada8dd | ||
|
|
464686aee6 | ||
|
|
bfa3d4ccff | ||
|
|
6a91288c8c | ||
|
|
96cb407ee0 | ||
|
|
5a19094d3a | ||
|
|
e3b943ffcb | ||
|
|
df30d6a482 | ||
|
|
c3c27b7e3d | ||
|
|
431716d4d6 | ||
|
|
d290fd159f | ||
|
|
051faaf771 | ||
|
|
41a2dfb0d9 | ||
|
|
ed0094c3d0 | ||
|
|
52fadeded1 | ||
|
|
a37fa8d9c4 | ||
|
|
03974a4dd4 | ||
|
|
1d6afbd65d | ||
|
|
d79f02ea09 | ||
|
|
ba2f426e3e | ||
|
|
732042e5c6 | ||
|
|
f1763aabf2 | ||
|
|
e0d90b173b | ||
|
|
ff07612bfa | ||
|
|
7badaf78a0 | ||
|
|
af41436f1b | ||
|
|
cd5489ce47 | ||
|
|
60ec2cf751 | ||
|
|
244f4b564f | ||
|
|
f1d6d65417 | ||
|
|
72e52c4f6a | ||
|
|
1656e1a88e | ||
|
|
7f62b418a4 | ||
|
|
1f4e66d638 | ||
|
|
a37b2c765c | ||
|
|
b4b67e00bd | ||
|
|
91e1ff5a95 | ||
|
|
d9204ea3b5 | ||
|
|
3d0fbcb4f7 | ||
|
|
03f3df9a82 | ||
|
|
fff35d5528 | ||
|
|
539e94db73 | ||
|
|
0f4f62cf3c | ||
|
|
e7cffd7afa | ||
|
|
26d790a2b6 | ||
|
|
5cf838c08d | ||
|
|
4db8f5cbce | ||
|
|
3b6b37a81b | ||
|
|
8f5aa2d9de | ||
|
|
a6bc8aa7c7 | ||
|
|
4ab107bc1a | ||
|
|
4c3710a531 | ||
|
|
901b06284a | ||
|
|
8eef5a2c5e | ||
|
|
e9cace137b | ||
|
|
9409c99738 | ||
|
|
4d44ebc2f2 | ||
|
|
9a1182fa01 | ||
|
|
66e9ef3f33 | ||
|
|
8282414583 | ||
|
|
d1d7ce83d4 | ||
|
|
5177837ab0 | ||
|
|
f9e368b7c4 | ||
|
|
eef80b9880 | ||
|
|
073eaec729 | ||
|
|
318225f631 | ||
|
|
89429a439b | ||
|
|
200fe358f0 | ||
|
|
e426ab7c23 | ||
|
|
715071b68d | ||
|
|
a05737c7e4 | ||
|
|
e8eb0b2c50 | ||
|
|
e15d29aba2 | ||
|
|
10675ac28e | ||
|
|
0ec25b8b07 | ||
|
|
e81ceff681 | ||
|
|
6831719e1e | ||
|
|
b264a91b3f | ||
|
|
1a08948e63 | ||
|
|
14a1e02f44 | ||
|
|
2f09aa1b85 | ||
|
|
a396040886 | ||
|
|
aeb1dca52e | ||
|
|
83a8d90c52 | ||
|
|
adebd557ce | ||
|
|
0c0e015b38 | ||
|
|
390bb3f58b | ||
|
|
30739d94a4 | ||
|
|
83e2dd5dff | ||
|
|
f496d0113b | ||
|
|
a752183fb5 | ||
|
|
296b97925f | ||
|
|
d0cc3047dc | ||
|
|
032a33de49 | ||
|
|
1e9bf19c8d | ||
|
|
4bd8434ae0 | ||
|
|
958f6eb722 | ||
|
|
96306a39a0 | ||
|
|
895cd7c76a | ||
|
|
cbdbe59f16 | ||
|
|
ee7904f170 | ||
|
|
a761e01944 | ||
|
|
96f8ec0402 | ||
|
|
8027fdf1c7 | ||
|
|
212c8e1a6d | ||
|
|
78533d7230 | ||
|
|
b5eeb5c5ab | ||
|
|
b147ad0596 | ||
|
|
7d0ac1ea3f | ||
|
|
d08d97bebf | ||
|
|
acb2eb23c8 | ||
|
|
de4aa9fb1d | ||
|
|
560ba6f25e | ||
|
|
8131ddd878 | ||
|
|
26c3deb673 | ||
|
|
6d20497d45 | ||
|
|
482c6b8be4 | ||
|
|
5bba5edf45 | ||
|
|
792b866727 | ||
|
|
f053f7bde2 | ||
|
|
d7dee3a5ec | ||
|
|
b8d74e52b1 | ||
|
|
62abe0d2c9 | ||
|
|
5414c294c4 | ||
|
|
1b3e89c89c | ||
|
|
69c6e5b192 | ||
|
|
0c02512f15 | ||
|
|
b0ead0bf12 | ||
|
|
ab5adf40af | ||
|
|
8d82afb595 | ||
|
|
aea71dd2c6 | ||
|
|
9fdb44323d | ||
|
|
6a299c04a7 | ||
|
|
9ce71fe427 | ||
|
|
e8de7b52da | ||
|
|
1780ccadbc | ||
|
|
f8cffd05e5 | ||
|
|
b898cd49b5 | ||
|
|
7cd33d10c9 | ||
|
|
cd480dbe5c | ||
|
|
cb8bf79ada | ||
|
|
b206eab80f | ||
|
|
80dc23fab9 | ||
|
|
844c0c422d | ||
|
|
07655c0c2e | ||
|
|
bebfd19b45 | ||
|
|
6e34430d99 | ||
|
|
0d08aaa29b | ||
|
|
66f9c06e7d | ||
|
|
775adf871f | ||
|
|
a0fc19a3d6 | ||
|
|
7bd18662a7 | ||
|
|
95b0739906 | ||
|
|
cad7e9a1cd | ||
|
|
4426efab05 | ||
|
|
6765b17acd | ||
|
|
ae1340d59b | ||
|
|
fc52f179fe | ||
|
|
4f43a9a162 | ||
|
|
20edd44463 | ||
|
|
1a4f9d8453 | ||
|
|
f2dd33b8f4 | ||
|
|
25e988868c | ||
|
|
ab344e4f47 | ||
|
|
fac7893dd6 | ||
|
|
9be338cfe4 | ||
|
|
b4d4f96919 | ||
|
|
8cc2d01caa | ||
|
|
bf37eebecb | ||
|
|
3f0850b58b | ||
|
|
2ffa89b8b9 | ||
|
|
d43adc0205 | ||
|
|
78b34505ab | ||
|
|
e55a1bed59 | ||
|
|
0d7550ad54 | ||
|
|
b5992255ac | ||
|
|
e845cc0401 | ||
|
|
a10033e8a4 | ||
|
|
6c6d840e6b | ||
|
|
a8b3b3d6f4 | ||
|
|
ec66f7e3b1 | ||
|
|
05841c2435 | ||
|
|
c553d73748 | ||
|
|
1006e8a2ed | ||
|
|
9bcfda171b | ||
|
|
baee4f7bd5 | ||
|
|
286dc32fe0 | ||
|
|
36e4c0fcf0 | ||
|
|
3c21c8789a | ||
|
|
d9facbcee9 | ||
|
|
930280ecac | ||
|
|
3415e6ae74 | ||
|
|
f1082f3c6d | ||
|
|
f345f7a795 | ||
|
|
1a2a7a57b3 | ||
|
|
ae80a2bd24 | ||
|
|
c30ecdd535 | ||
|
|
f16c7cef92 | ||
|
|
e1dd78bcea | ||
|
|
25acb0cbbc | ||
|
|
7674c80bb6 | ||
|
|
e044970a5b | ||
|
|
639526d207 | ||
|
|
998ff9fa22 | ||
|
|
7122c7472e | ||
|
|
671381267a | ||
|
|
d1762e098e | ||
|
|
270d33504b | ||
|
|
9b0983d027 | ||
|
|
afd0af987d | ||
|
|
58524d40c9 | ||
|
|
2a7222c6aa | ||
|
|
0093985e7c | ||
|
|
7f51e2dddf | ||
|
|
f3bbdef77d | ||
|
|
9cbf168dc0 | ||
|
|
9572f0577b | ||
|
|
1a14c7d45a | ||
|
|
5c29e0cd4d | ||
|
|
1a74af1492 | ||
|
|
8f6332ab23 | ||
|
|
816ae7a53a | ||
|
|
1d630e4185 | ||
|
|
bc8dd3ad14 | ||
|
|
b969053701 | ||
|
|
60bf7c9dd7 | ||
|
|
d65c10cee7 | ||
|
|
6c71698299 | ||
|
|
c7c275c7c8 | ||
|
|
d0adbee75d | ||
|
|
159a7f6df2 | ||
|
|
0eb2911aad | ||
|
|
cab9f88ca4 | ||
|
|
a3b675b09e | ||
|
|
6477913e8f | ||
|
|
138cd97ce7 | ||
|
|
4dd9ac39b0 | ||
|
|
23499ddc8a | ||
|
|
8864156300 | ||
|
|
478014ca18 | ||
|
|
d45477b003 | ||
|
|
396fb88e33 | ||
|
|
a429ec1b3f | ||
|
|
5b5fb9c22a | ||
|
|
801a87c3a6 | ||
|
|
badbd212f7 | ||
|
|
c4bbecc4d6 | ||
|
|
8a08e9ec67 | ||
|
|
61e486dbf5 | ||
|
|
f2f387e1dd | ||
|
|
3be9a08fc9 | ||
|
|
b325807c60 | ||
|
|
ae9855a39e | ||
|
|
9ac62b589f | ||
|
|
d12660a286 | ||
|
|
3d3bd2d10f | ||
|
|
b656d10556 | ||
|
|
8c67f38ef6 | ||
|
|
4623728cd7 | ||
|
|
5f804aa6e8 | ||
|
|
f52c6e3a31 | ||
|
|
0b4bb7a562 | ||
|
|
2bc4b56a79 | ||
|
|
fc920cc58a | ||
|
|
fdb560b8e5 | ||
|
|
708cba0c1b | ||
|
|
24abf568cb | ||
|
|
7ca0e2d925 | ||
|
|
037e8030bf | ||
|
|
472d11f884 | ||
|
|
b40d5d12b7 | ||
|
|
6938618e30 | ||
|
|
5d9c530eaa | ||
|
|
9429a53db7 | ||
|
|
1d6d301370 | ||
|
|
8f2be82667 | ||
|
|
cca911f3e5 | ||
|
|
e37bbbaacc | ||
|
|
59cbf38b4b | ||
|
|
432c31d904 | ||
|
|
af33483687 | ||
|
|
5051074845 | ||
|
|
fc4a714992 | ||
|
|
0429e00746 | ||
|
|
73f1f25b9a | ||
|
|
044570fa85 | ||
|
|
37527420de | ||
|
|
1854b8c612 | ||
|
|
b8824f2ad9 | ||
|
|
3ab83e91df | ||
|
|
f2cb261797 |
@@ -16,7 +16,7 @@ headers {
|
|||||||
|
|
||||||
body:json {
|
body:json {
|
||||||
{
|
{
|
||||||
"backend": "transformers-musicgen",
|
"backend": "transformers",
|
||||||
"model": "facebook/musicgen-small",
|
"model": "facebook/musicgen-small",
|
||||||
"input": "80s Synths playing Jazz"
|
"input": "80s Synths playing Jazz"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ services:
|
|||||||
args:
|
args:
|
||||||
- FFMPEG=true
|
- FFMPEG=true
|
||||||
- IMAGE_TYPE=extras
|
- IMAGE_TYPE=extras
|
||||||
- GO_TAGS=stablediffusion p2p tts
|
- GO_TAGS=p2p tts
|
||||||
env_file:
|
env_file:
|
||||||
- ../.env
|
- ../.env
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
15
.env
15
.env
@@ -38,12 +38,12 @@
|
|||||||
## Uncomment and set to true to enable rebuilding from source
|
## Uncomment and set to true to enable rebuilding from source
|
||||||
# REBUILD=true
|
# REBUILD=true
|
||||||
|
|
||||||
## Enable go tags, available: stablediffusion, tts
|
## Enable go tags, available: p2p, tts
|
||||||
## stablediffusion: image generation with stablediffusion
|
## p2p: enable distributed inferencing
|
||||||
## tts: enables text-to-speech with go-piper
|
## tts: enables text-to-speech with go-piper
|
||||||
## (requires REBUILD=true)
|
## (requires REBUILD=true)
|
||||||
#
|
#
|
||||||
# GO_TAGS=stablediffusion
|
# GO_TAGS=p2p
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
@@ -82,6 +82,15 @@
|
|||||||
# Enable to allow p2p mode
|
# Enable to allow p2p mode
|
||||||
# LOCALAI_P2P=true
|
# LOCALAI_P2P=true
|
||||||
|
|
||||||
|
# Enable to use federated mode
|
||||||
|
# LOCALAI_FEDERATED=true
|
||||||
|
|
||||||
|
# Enable to start federation server
|
||||||
|
# FEDERATED_SERVER=true
|
||||||
|
|
||||||
|
# Define to use federation token
|
||||||
|
# TOKEN=""
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
|
|||||||
8
.github/dependabot.yml
vendored
8
.github/dependabot.yml
vendored
@@ -81,14 +81,6 @@ updates:
|
|||||||
directory: "/backend/python/transformers"
|
directory: "/backend/python/transformers"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/transformers-musicgen"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
|
||||||
directory: "/backend/python/vall-e-x"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/vllm"
|
directory: "/backend/python/vllm"
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
4
.github/labeler.yml
vendored
4
.github/labeler.yml
vendored
@@ -5,6 +5,10 @@ dependencies:
|
|||||||
- any:
|
- any:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file: 'Makefile'
|
- any-glob-to-any-file: 'Makefile'
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file: '*.mod'
|
||||||
|
- changed-files:
|
||||||
|
- any-glob-to-any-file: '*.sum'
|
||||||
|
|
||||||
kind/documentation:
|
kind/documentation:
|
||||||
- any:
|
- any:
|
||||||
|
|||||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@v2.2.0
|
uses: dependabot/fetch-metadata@v2.3.0
|
||||||
with:
|
with:
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
|||||||
47
.github/workflows/image.yml
vendored
47
.github/workflows/image.yml
vendored
@@ -280,6 +280,7 @@ jobs:
|
|||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
latest-image: ${{ matrix.latest-image }}
|
latest-image: ${{ matrix.latest-image }}
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
|
skip-drivers: ${{ matrix.skip-drivers }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -301,6 +302,7 @@ jobs:
|
|||||||
latest-image: 'latest-cpu'
|
latest-image: 'latest-cpu'
|
||||||
latest-image-aio: 'latest-aio-cpu'
|
latest-image-aio: 'latest-aio-cpu'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
skip-drivers: 'false'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -312,6 +314,7 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
skip-drivers: 'false'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
@@ -323,6 +326,7 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
skip-drivers: 'false'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -334,6 +338,7 @@ jobs:
|
|||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
skip-drivers: 'false'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
@@ -344,6 +349,7 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
@@ -354,4 +360,45 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
|
skip-drivers: 'false'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
gh-runner:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
aio: ${{ matrix.aio }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
|
skip-drivers: ${{ matrix.skip-drivers }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: 'cublas'
|
||||||
|
cuda-major-version: "12"
|
||||||
|
cuda-minor-version: "0"
|
||||||
|
platforms: 'linux/arm64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-nvidia-l4t-arm64-core'
|
||||||
|
latest-image: 'latest-nvidia-l4t-arm64-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||||
|
runs-on: 'ubuntu-24.04-arm'
|
||||||
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
skip-drivers: 'true'
|
||||||
6
.github/workflows/image_build.yml
vendored
6
.github/workflows/image_build.yml
vendored
@@ -49,6 +49,10 @@ on:
|
|||||||
description: 'FFMPEG'
|
description: 'FFMPEG'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
|
skip-drivers:
|
||||||
|
description: 'Skip drivers by default'
|
||||||
|
default: 'false'
|
||||||
|
type: string
|
||||||
image-type:
|
image-type:
|
||||||
description: 'Image type'
|
description: 'Image type'
|
||||||
default: ''
|
default: ''
|
||||||
@@ -234,6 +238,7 @@ jobs:
|
|||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
|
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
@@ -262,6 +267,7 @@ jobs:
|
|||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
|
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
|
|||||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
@@ -99,7 +99,7 @@ jobs:
|
|||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
|
|||||||
35
.github/workflows/release.yaml
vendored
35
.github/workflows/release.yaml
vendored
@@ -237,40 +237,7 @@ jobs:
|
|||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
limit-access-to-actor: true
|
limit-access-to-actor: true
|
||||||
build-stablediffusion:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Build stablediffusion
|
|
||||||
run: |
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
make backend-assets/grpc/stablediffusion
|
|
||||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
|
||||||
env:
|
|
||||||
GO_TAGS: stablediffusion
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: stablediffusion
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
|
|
||||||
build-macOS-x86_64:
|
build-macOS-x86_64:
|
||||||
runs-on: macos-13
|
runs-on: macos-13
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.21.4
|
uses: securego/gosec@v2.22.0
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
135
.github/workflows/test-extra.yml
vendored
135
.github/workflows/test-extra.yml
vendored
@@ -35,30 +35,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||||
|
|
||||||
tests-sentencetransformers:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test sentencetransformers
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
|
||||||
|
|
||||||
|
|
||||||
tests-rerankers:
|
tests-rerankers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -102,78 +78,27 @@ jobs:
|
|||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||||
|
|
||||||
tests-parler-tts:
|
# tests-transformers-musicgen:
|
||||||
runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
steps:
|
# steps:
|
||||||
- name: Clone
|
# - name: Clone
|
||||||
uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
with:
|
# with:
|
||||||
submodules: true
|
# submodules: true
|
||||||
- name: Dependencies
|
# - name: Dependencies
|
||||||
run: |
|
# run: |
|
||||||
sudo apt-get update
|
# sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
# sudo apt-get install build-essential ffmpeg
|
||||||
# Install UV
|
# # Install UV
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test parler-tts
|
# - name: Test transformers-musicgen
|
||||||
run: |
|
# run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-openvoice:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test openvoice
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
|
||||||
|
|
||||||
tests-transformers-musicgen:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test transformers-musicgen
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
@@ -260,26 +185,6 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||||
tests-vallex:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
- name: Test vall-e-x
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
|
||||||
|
|
||||||
tests-coqui:
|
tests-coqui:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
9
.github/workflows/test.yml
vendored
9
.github/workflows/test.yml
vendored
@@ -100,15 +100,12 @@ jobs:
|
|||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
make -C backend/python/transformers
|
||||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
|
||||||
|
|
||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
make sources/go-piper && \
|
make sources/go-piper && \
|
||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
CUDA_VERSION: 12-4
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
@@ -130,7 +127,7 @@ jobs:
|
|||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.19
|
||||||
|
|||||||
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -26,7 +26,7 @@
|
|||||||
"LOCALAI_P2P": "true",
|
"LOCALAI_P2P": "true",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
},
|
},
|
||||||
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
"buildFlags": ["-tags", "p2p tts", "-v"],
|
||||||
"envFile": "${workspaceFolder}/.env",
|
"envFile": "${workspaceFolder}/.env",
|
||||||
"cwd": "${workspaceRoot}"
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
|
|||||||
78
Dockerfile
78
Dockerfile
@@ -15,8 +15,7 @@ ARG TARGETARCH
|
|||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
@@ -69,14 +68,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
|
|||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libopenblas-dev \
|
libopenblas-dev && \
|
||||||
libopencv-dev && \
|
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Set up OpenCV
|
|
||||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
@@ -115,12 +110,13 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
|||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=12
|
||||||
ARG CUDA_MINOR_VERSION=0
|
ARG CUDA_MINOR_VERSION=0
|
||||||
|
ARG SKIP_DRIVERS=false
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
|
||||||
# Vulkan requirements
|
# Vulkan requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
software-properties-common pciutils wget gpg-agent && \
|
||||||
@@ -136,7 +132,7 @@ EOT
|
|||||||
|
|
||||||
# CuBLAS requirements
|
# CuBLAS requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils
|
software-properties-common pciutils
|
||||||
@@ -162,7 +158,7 @@ RUN <<EOT bash
|
|||||||
EOT
|
EOT
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libclblast-dev && \
|
libclblast-dev && \
|
||||||
@@ -170,7 +166,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
|||||||
rm -rf /var/lib/apt/lists/* \
|
rm -rf /var/lib/apt/lists/* \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
hipblas-dev \
|
hipblas-dev \
|
||||||
@@ -250,7 +246,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
|
|
||||||
FROM requirements-drivers AS builder-base
|
FROM requirements-drivers AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts p2p"
|
ARG GO_TAGS="tts p2p"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG LD_FLAGS="-s -w"
|
ARG LD_FLAGS="-s -w"
|
||||||
@@ -284,35 +280,12 @@ RUN <<EOT bash
|
|||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
|
||||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
|
||||||
FROM builder-base AS builder-sd
|
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
|
||||||
COPY Makefile .
|
|
||||||
COPY go.mod .
|
|
||||||
COPY go.sum .
|
|
||||||
COPY backend/backend.proto ./backend/backend.proto
|
|
||||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
COPY pkg/grpc ./pkg/grpc
|
|
||||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
|
||||||
RUN git init
|
|
||||||
RUN make sources/go-stable-diffusion
|
|
||||||
RUN touch prepare-sources
|
|
||||||
|
|
||||||
# Actually build the backend
|
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
# Adjustments to the build process should likely be made here.
|
# Adjustments to the build process should likely be made here.
|
||||||
FROM builder-sd AS builder
|
FROM builder-base AS builder
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
# Install the pre-built GRPC
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
@@ -330,7 +303,7 @@ RUN make prepare
|
|||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||||
## (both will use CUDA or hipblas for the actual computation)
|
## (both will use CUDA or hipblas for the actual computation)
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
fi
|
fi
|
||||||
@@ -352,8 +325,6 @@ ARG FFMPEG
|
|||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
@@ -383,12 +354,14 @@ FROM requirements-drivers
|
|||||||
|
|
||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
|
ARG BUILD_PLATFORM
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG EXTRA_BACKENDS
|
ARG EXTRA_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
ENV BUILD_PLATFORM=${BUILD_PLATFORM}
|
||||||
ENV REBUILD=false
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||||
@@ -426,36 +399,28 @@ COPY --from=builder /build/local-ai ./
|
|||||||
# Copy shared libraries for piper
|
# Copy shared libraries for piper
|
||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
# Change the shell to bash so we can use [[ tests below
|
||||||
SHELL ["/bin/bash", "-c"]
|
SHELL ["/bin/bash", "-c"]
|
||||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||||
|
|
||||||
|
RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
||||||
|
apt-get -qq -y install espeak-ng \
|
||||||
|
; fi
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/coqui \
|
make -C backend/python/coqui \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/parler-tts \
|
make -C backend/python/faster-whisper \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/diffusers \
|
make -C backend/python/diffusers \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/transformers-musicgen \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/vall-e-x \
|
make -C backend/python/kokoro \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/openvoice \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/sentencetransformers \
|
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/exllama2 \
|
make -C backend/python/exllama2 \
|
||||||
@@ -475,9 +440,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
|
|||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/rerankers \
|
make -C backend/python/rerankers \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/mamba \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
|
|||||||
166
Makefile
166
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623
|
CPPLLAMA_VERSION?=5598f475be3e31430fbe17ebb85654ec90dc201e
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
@@ -18,21 +18,13 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
|||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||||
|
|
||||||
# stablediffusion version
|
|
||||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
|
||||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
|
||||||
|
|
||||||
# tinydream version
|
|
||||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
|
||||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
|
||||||
|
|
||||||
# bark.cpp
|
# bark.cpp
|
||||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||||
BARKCPP_VERSION?=v1.0.0
|
BARKCPP_VERSION?=v1.0.0
|
||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# stablediffusion.cpp (ggml)
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||||
STABLEDIFFUSION_GGML_VERSION?=9578fdcc4632dc3de5565f28e2fb16b7c18f8d48
|
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
||||||
|
|
||||||
ONNX_VERSION?=1.20.0
|
ONNX_VERSION?=1.20.0
|
||||||
ONNX_ARCH?=x64
|
ONNX_ARCH?=x64
|
||||||
@@ -183,16 +175,6 @@ ifeq ($(STATIC),true)
|
|||||||
LD_FLAGS+=-linkmode external -extldflags -static
|
LD_FLAGS+=-linkmode external -extldflags -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
|
||||||
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
|
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
|
|
||||||
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
|
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
@@ -204,6 +186,7 @@ endif
|
|||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
@@ -282,19 +265,6 @@ sources/go-piper:
|
|||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
## stable diffusion (onnx)
|
|
||||||
sources/go-stable-diffusion:
|
|
||||||
mkdir -p sources/go-stable-diffusion
|
|
||||||
cd sources/go-stable-diffusion && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(STABLEDIFFUSION_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
|
||||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
|
||||||
|
|
||||||
## stablediffusion (ggml)
|
## stablediffusion (ggml)
|
||||||
sources/stablediffusion-ggml.cpp:
|
sources/stablediffusion-ggml.cpp:
|
||||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||||
@@ -302,14 +272,8 @@ sources/stablediffusion-ggml.cpp:
|
|||||||
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
|
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
||||||
cd sources/stablediffusion-ggml.cpp && \
|
$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) .. && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
|
|
||||||
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
|
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
||||||
@@ -333,19 +297,6 @@ else
|
|||||||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
## tiny-dream
|
|
||||||
sources/go-tiny-dream:
|
|
||||||
mkdir -p sources/go-tiny-dream
|
|
||||||
cd sources/go-tiny-dream && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(TINYDREAM_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(TINYDREAM_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
|
||||||
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
|
||||||
|
|
||||||
## whisper
|
## whisper
|
||||||
sources/whisper.cpp:
|
sources/whisper.cpp:
|
||||||
mkdir -p sources/whisper.cpp
|
mkdir -p sources/whisper.cpp
|
||||||
@@ -359,22 +310,18 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
@@ -385,9 +332,7 @@ rebuild: ## Rebuilds the project
|
|||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) -C sources/go-tiny-dream clean
|
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
@@ -501,9 +446,9 @@ prepare-test: grpcs
|
|||||||
|
|
||||||
test: prepare test-models/testmodel.ggml grpcs
|
test: prepare test-models/testmodel.ggml grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion debug"
|
export GO_TAGS="tts debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
@@ -589,10 +534,10 @@ protogen-go-clean:
|
|||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
.PHONY: autogptq-protogen
|
||||||
autogptq-protogen:
|
autogptq-protogen:
|
||||||
@@ -626,6 +571,14 @@ diffusers-protogen:
|
|||||||
diffusers-protogen-clean:
|
diffusers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: faster-whisper-protogen
|
||||||
|
faster-whisper-protogen:
|
||||||
|
$(MAKE) -C backend/python/faster-whisper protogen
|
||||||
|
|
||||||
|
.PHONY: faster-whisper-protogen-clean
|
||||||
|
faster-whisper-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/faster-whisper protogen-clean
|
||||||
|
|
||||||
.PHONY: exllama2-protogen
|
.PHONY: exllama2-protogen
|
||||||
exllama2-protogen:
|
exllama2-protogen:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen
|
$(MAKE) -C backend/python/exllama2 protogen
|
||||||
@@ -634,14 +587,6 @@ exllama2-protogen:
|
|||||||
exllama2-protogen-clean:
|
exllama2-protogen-clean:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||||
|
|
||||||
.PHONY: mamba-protogen
|
|
||||||
mamba-protogen:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen
|
|
||||||
|
|
||||||
.PHONY: mamba-protogen-clean
|
|
||||||
mamba-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen-clean
|
|
||||||
|
|
||||||
.PHONY: rerankers-protogen
|
.PHONY: rerankers-protogen
|
||||||
rerankers-protogen:
|
rerankers-protogen:
|
||||||
$(MAKE) -C backend/python/rerankers protogen
|
$(MAKE) -C backend/python/rerankers protogen
|
||||||
@@ -650,14 +595,6 @@ rerankers-protogen:
|
|||||||
rerankers-protogen-clean:
|
rerankers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen
|
|
||||||
sentencetransformers-protogen:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
|
||||||
|
|
||||||
.PHONY: sentencetransformers-protogen-clean
|
|
||||||
sentencetransformers-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
|
||||||
|
|
||||||
.PHONY: transformers-protogen
|
.PHONY: transformers-protogen
|
||||||
transformers-protogen:
|
transformers-protogen:
|
||||||
$(MAKE) -C backend/python/transformers protogen
|
$(MAKE) -C backend/python/transformers protogen
|
||||||
@@ -666,37 +603,13 @@ transformers-protogen:
|
|||||||
transformers-protogen-clean:
|
transformers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/transformers protogen-clean
|
$(MAKE) -C backend/python/transformers protogen-clean
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen
|
.PHONY: kokoro-protogen
|
||||||
parler-tts-protogen:
|
kokoro-protogen:
|
||||||
$(MAKE) -C backend/python/parler-tts protogen
|
$(MAKE) -C backend/python/kokoro protogen
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen-clean
|
.PHONY: kokoro-protogen-clean
|
||||||
parler-tts-protogen-clean:
|
kokoro-protogen-clean:
|
||||||
$(MAKE) -C backend/python/parler-tts protogen-clean
|
$(MAKE) -C backend/python/kokoro protogen-clean
|
||||||
|
|
||||||
.PHONY: transformers-musicgen-protogen
|
|
||||||
transformers-musicgen-protogen:
|
|
||||||
$(MAKE) -C backend/python/transformers-musicgen protogen
|
|
||||||
|
|
||||||
.PHONY: transformers-musicgen-protogen-clean
|
|
||||||
transformers-musicgen-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
|
|
||||||
|
|
||||||
.PHONY: vall-e-x-protogen
|
|
||||||
vall-e-x-protogen:
|
|
||||||
$(MAKE) -C backend/python/vall-e-x protogen
|
|
||||||
|
|
||||||
.PHONY: vall-e-x-protogen-clean
|
|
||||||
vall-e-x-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
|
||||||
|
|
||||||
.PHONY: openvoice-protogen
|
|
||||||
openvoice-protogen:
|
|
||||||
$(MAKE) -C backend/python/openvoice protogen
|
|
||||||
|
|
||||||
.PHONY: openvoice-protogen-clean
|
|
||||||
openvoice-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/openvoice protogen-clean
|
|
||||||
|
|
||||||
.PHONY: vllm-protogen
|
.PHONY: vllm-protogen
|
||||||
vllm-protogen:
|
vllm-protogen:
|
||||||
@@ -713,15 +626,11 @@ prepare-extra-conda-environments: protogen-python
|
|||||||
$(MAKE) -C backend/python/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C backend/python/coqui
|
$(MAKE) -C backend/python/coqui
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
$(MAKE) -C backend/python/faster-whisper
|
||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
$(MAKE) -C backend/python/mamba
|
|
||||||
$(MAKE) -C backend/python/sentencetransformers
|
|
||||||
$(MAKE) -C backend/python/rerankers
|
$(MAKE) -C backend/python/rerankers
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/transformers-musicgen
|
$(MAKE) -C backend/python/kokoro
|
||||||
$(MAKE) -C backend/python/parler-tts
|
|
||||||
$(MAKE) -C backend/python/vall-e-x
|
|
||||||
$(MAKE) -C backend/python/openvoice
|
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
@@ -791,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
|
|||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
||||||
|
$(MAKE) -C backend/cpp/llama-avx512 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
@@ -865,13 +781,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/piper
|
$(UPX) backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/stablediffusion
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||||
@@ -879,13 +788,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/silero-vad
|
$(UPX) backend-assets/grpc/silero-vad
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/tinydream
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||||
@@ -959,7 +861,7 @@ swagger:
|
|||||||
|
|
||||||
.PHONY: gen-assets
|
.PHONY: gen-assets
|
||||||
gen-assets:
|
gen-assets:
|
||||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
docs/layouts/_default:
|
docs/layouts/_default:
|
||||||
|
|||||||
18
README.md
18
README.md
@@ -39,7 +39,7 @@
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
@@ -92,19 +92,15 @@ local-ai run oci://localai/phi-2:latest
|
|||||||
|
|
||||||
## 📰 Latest project news
|
## 📰 Latest project news
|
||||||
|
|
||||||
|
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
||||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
||||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
|
||||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
|
||||||
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||||
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
|
||||||
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
|
||||||
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
|
||||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
@@ -113,12 +109,10 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
|
|||||||
|
|
||||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||||
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
|
||||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
||||||
|
|
||||||
@@ -126,10 +120,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||||
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
- 🎨 [Image generation](https://localai.io/features/image-generation)
|
||||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
||||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
@@ -137,6 +131,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||||
|
- 🔊 Voice activity detection (Silero-VAD support)
|
||||||
- 🌍 Integrated WebUI!
|
- 🌍 Integrated WebUI!
|
||||||
|
|
||||||
## 💻 Usage
|
## 💻 Usage
|
||||||
@@ -159,6 +154,7 @@ Model galleries
|
|||||||
Other:
|
Other:
|
||||||
- Helm chart https://github.com/go-skynet/helm-charts
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
|
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
|
||||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
- Terminal utility https://github.com/djcopley/ShellOracle
|
||||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
||||||
|
|||||||
@@ -1,56 +1,17 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
backend: stablediffusion
|
backend: stablediffusion-ggml
|
||||||
|
cfg_scale: 4.5
|
||||||
|
|
||||||
|
options:
|
||||||
|
- sampler:euler
|
||||||
parameters:
|
parameters:
|
||||||
model: stablediffusion_assets
|
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
||||||
|
step: 25
|
||||||
license: "BSD-3"
|
|
||||||
urls:
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
|
||||||
|
|
||||||
description: |
|
|
||||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
|
||||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
|
||||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
|
||||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
|
||||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/vocab.txt"
|
|
||||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/images/generations \
|
curl http://localhost:8080/v1/images/generations \
|
||||||
|
|||||||
@@ -159,6 +159,13 @@ message Reply {
|
|||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
int32 tokens = 2;
|
int32 tokens = 2;
|
||||||
int32 prompt_tokens = 3;
|
int32 prompt_tokens = 3;
|
||||||
|
double timing_prompt_processing = 4;
|
||||||
|
double timing_token_generation = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message GrammarTrigger {
|
||||||
|
string word = 1;
|
||||||
|
bool at_start = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -245,6 +252,8 @@ message ModelOptions {
|
|||||||
|
|
||||||
string CacheTypeKey = 63;
|
string CacheTypeKey = 63;
|
||||||
string CacheTypeValue = 64;
|
string CacheTypeValue = 64;
|
||||||
|
|
||||||
|
repeated GrammarTrigger GrammarTriggers = 65;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -348,4 +357,4 @@ message StatusResponse {
|
|||||||
message Message {
|
message Message {
|
||||||
string role = 1;
|
string role = 1;
|
||||||
string content = 2;
|
string content = 2;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Adds an RPC server
|
||||||
|
// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
|
||||||
|
static void add_rpc_devices(std::string servers) {
|
||||||
|
auto rpc_servers = string_split<std::string>(servers, ',');
|
||||||
|
if (rpc_servers.empty()) {
|
||||||
|
throw std::invalid_argument("no RPC servers specified");
|
||||||
|
}
|
||||||
|
ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
|
||||||
|
if (!rpc_reg) {
|
||||||
|
throw std::invalid_argument("failed to find RPC backend");
|
||||||
|
}
|
||||||
|
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
|
||||||
|
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
|
||||||
|
if (!ggml_backend_rpc_add_device_fn) {
|
||||||
|
throw std::invalid_argument("failed to find RPC device add function");
|
||||||
|
}
|
||||||
|
for (const auto & server : rpc_servers) {
|
||||||
|
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
|
||||||
|
if (dev) {
|
||||||
|
ggml_backend_device_register(dev);
|
||||||
|
} else {
|
||||||
|
throw std::invalid_argument("failed to register RPC device");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// convert a vector of completion_token_output to json
|
// convert a vector of completion_token_output to json
|
||||||
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
|
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
|
||||||
{
|
{
|
||||||
@@ -428,6 +454,7 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
llama_model *model = nullptr;
|
llama_model *model = nullptr;
|
||||||
llama_context *ctx = nullptr;
|
llama_context *ctx = nullptr;
|
||||||
|
const llama_vocab * vocab = nullptr;
|
||||||
|
|
||||||
clip_ctx *clp_ctx = nullptr;
|
clip_ctx *clp_ctx = nullptr;
|
||||||
|
|
||||||
@@ -439,6 +466,10 @@ struct llama_server_context
|
|||||||
bool clean_kv_cache = true;
|
bool clean_kv_cache = true;
|
||||||
bool all_slots_are_idle = false;
|
bool all_slots_are_idle = false;
|
||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
|
bool has_eos_token = true;
|
||||||
|
|
||||||
|
bool grammar_lazy = false;
|
||||||
|
std::vector<common_grammar_trigger> grammar_trigger_words;
|
||||||
|
|
||||||
int32_t n_ctx; // total context for all clients / slots
|
int32_t n_ctx; // total context for all clients / slots
|
||||||
|
|
||||||
@@ -492,8 +523,8 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
common_init_result common_init = common_init_from_params(params);
|
common_init_result common_init = common_init_from_params(params);
|
||||||
model = common_init.model;
|
model = common_init.model.release();
|
||||||
ctx = common_init.context;
|
ctx = common_init.context.release();
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||||
@@ -502,7 +533,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (multimodal) {
|
if (multimodal) {
|
||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
const int n_embd_llm = llama_n_embd(model);
|
const int n_embd_llm = llama_model_n_embd(model);
|
||||||
if (n_embd_clip != n_embd_llm) {
|
if (n_embd_clip != n_embd_llm) {
|
||||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
@@ -511,23 +542,15 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vocab = llama_model_get_vocab(model);
|
||||||
n_ctx = llama_n_ctx(ctx);
|
n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
add_bos_token = llama_add_bos_token(model);
|
add_bos_token = llama_vocab_get_add_bos(vocab);
|
||||||
|
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void validate_model_chat_template(server_params & sparams) {
|
|
||||||
llama_chat_message chat[] = {{"user", "test"}};
|
|
||||||
std::vector<char> buf(1);
|
|
||||||
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
|
||||||
if (res < 0) {
|
|
||||||
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
|
||||||
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_client_slot* get_active_slot() {
|
llama_client_slot* get_active_slot() {
|
||||||
for (llama_client_slot& slot : slots) {
|
for (llama_client_slot& slot : slots) {
|
||||||
// Check if the slot is currently processing
|
// Check if the slot is currently processing
|
||||||
@@ -681,12 +704,13 @@ struct llama_server_context
|
|||||||
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||||
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
|
slot->sparams.grammar_trigger_words = grammar_trigger_words;
|
||||||
|
slot->sparams.grammar_lazy = grammar_lazy;
|
||||||
|
|
||||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||||
// Might be better to reject the request with a 400 ?
|
// Might be better to reject the request with a 400 ?
|
||||||
@@ -726,8 +750,8 @@ struct llama_server_context
|
|||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (json_value(data, "ignore_eos", false)) {
|
if (json_value(data, "ignore_eos", false) && has_eos_token) {
|
||||||
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
slot->sparams.penalty_prompt_tokens.clear();
|
slot->sparams.penalty_prompt_tokens.clear();
|
||||||
@@ -766,13 +790,13 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
slot->sparams.logit_bias.clear();
|
slot->sparams.logit_bias.clear();
|
||||||
|
|
||||||
const auto &logit_bias = data.find("logit_bias");
|
const auto &logit_bias = data.find("logit_bias");
|
||||||
if (logit_bias != data.end() && logit_bias->is_array())
|
if (logit_bias != data.end() && logit_bias->is_array())
|
||||||
{
|
{
|
||||||
const int n_vocab = llama_n_vocab(model);
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||||
|
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||||
for (const auto &el : *logit_bias)
|
for (const auto &el : *logit_bias)
|
||||||
{
|
{
|
||||||
if (el.is_array() && el.size() == 2)
|
if (el.is_array() && el.size() == 2)
|
||||||
@@ -801,7 +825,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
else if (el[0].is_string())
|
else if (el[0].is_string())
|
||||||
{
|
{
|
||||||
auto toks = common_tokenize(model, el[0].get<std::string>(), false);
|
auto toks = common_tokenize(vocab, el[0].get<std::string>(), false);
|
||||||
for (auto tok : toks)
|
for (auto tok : toks)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias.push_back({tok, bias});
|
slot->sparams.logit_bias.push_back({tok, bias});
|
||||||
@@ -1131,7 +1155,7 @@ struct llama_server_context
|
|||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.tok == llama_token_eos(model))
|
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
||||||
{
|
{
|
||||||
slot.stopped_eos = true;
|
slot.stopped_eos = true;
|
||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
@@ -1213,13 +1237,12 @@ struct llama_server_context
|
|||||||
{"mirostat", slot.sparams.mirostat},
|
{"mirostat", slot.sparams.mirostat},
|
||||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||||
{"penalize_nl", slot.sparams.penalize_nl},
|
|
||||||
{"stop", slot.params.antiprompt},
|
{"stop", slot.params.antiprompt},
|
||||||
{"n_predict", slot.params.n_predict},
|
{"n_predict", slot.params.n_predict},
|
||||||
{"n_keep", params.n_keep},
|
{"n_keep", params.n_keep},
|
||||||
{"ignore_eos", slot.sparams.ignore_eos},
|
{"ignore_eos", slot.sparams.ignore_eos},
|
||||||
{"stream", slot.params.stream},
|
{"stream", slot.params.stream},
|
||||||
// {"logit_bias", slot.sparams.logit_bias},
|
// {"logit_bias", slot.sparams.logit_bias},
|
||||||
{"n_probs", slot.sparams.n_probs},
|
{"n_probs", slot.sparams.n_probs},
|
||||||
{"min_keep", slot.sparams.min_keep},
|
{"min_keep", slot.sparams.min_keep},
|
||||||
{"grammar", slot.sparams.grammar},
|
{"grammar", slot.sparams.grammar},
|
||||||
@@ -1327,7 +1350,7 @@ struct llama_server_context
|
|||||||
res.error = false;
|
res.error = false;
|
||||||
res.stop = true;
|
res.stop = true;
|
||||||
|
|
||||||
const int n_embd = llama_n_embd(model);
|
const int n_embd = llama_model_n_embd(model);
|
||||||
if (!params.embedding)
|
if (!params.embedding)
|
||||||
{
|
{
|
||||||
LOG_WARNING("embedding disabled", {
|
LOG_WARNING("embedding disabled", {
|
||||||
@@ -1426,7 +1449,7 @@ struct llama_server_context
|
|||||||
n_eval = n_batch;
|
n_eval = n_batch;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int n_embd = llama_n_embd(model);
|
const int n_embd = llama_model_n_embd(model);
|
||||||
float * embd = img.image_embedding + i * n_embd;
|
float * embd = img.image_embedding + i * n_embd;
|
||||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
||||||
if (llama_decode(ctx, llava_batch.batch))
|
if (llama_decode(ctx, llava_batch.batch))
|
||||||
@@ -1707,11 +1730,11 @@ struct llama_server_context
|
|||||||
suffix_tokens.erase(suffix_tokens.begin());
|
suffix_tokens.erase(suffix_tokens.begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
|
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab));
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
|
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS
|
||||||
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
|
prefix_tokens.insert(prefix_tokens.end(), llama_vocab_fim_suf(vocab));
|
||||||
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
||||||
prefix_tokens.push_back(llama_token_middle(model));
|
prefix_tokens.push_back(llama_vocab_fim_mid(vocab));
|
||||||
prompt_tokens = prefix_tokens;
|
prompt_tokens = prefix_tokens;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -2112,7 +2135,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||||
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||||
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
// slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
|
||||||
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
// slot->params.seed = json_value(data, "seed", default_params.seed);
|
// slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||||
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
@@ -2135,7 +2157,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["mirostat"] = predict->mirostat();
|
data["mirostat"] = predict->mirostat();
|
||||||
data["mirostat_tau"] = predict->mirostattau();
|
data["mirostat_tau"] = predict->mirostattau();
|
||||||
data["mirostat_eta"] = predict->mirostateta();
|
data["mirostat_eta"] = predict->mirostateta();
|
||||||
data["penalize_nl"] = predict->penalizenl();
|
|
||||||
data["n_keep"] = predict->nkeep();
|
data["n_keep"] = predict->nkeep();
|
||||||
data["seed"] = predict->seed();
|
data["seed"] = predict->seed();
|
||||||
data["grammar"] = predict->grammar();
|
data["grammar"] = predict->grammar();
|
||||||
@@ -2181,7 +2202,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// llama.params.sparams.mirostat = predict->mirostat();
|
// llama.params.sparams.mirostat = predict->mirostat();
|
||||||
// llama.params.sparams.mirostat_tau = predict->mirostattau();
|
// llama.params.sparams.mirostat_tau = predict->mirostattau();
|
||||||
// llama.params.sparams.mirostat_eta = predict->mirostateta();
|
// llama.params.sparams.mirostat_eta = predict->mirostateta();
|
||||||
// llama.params.sparams.penalize_nl = predict->penalizenl();
|
|
||||||
// llama.params.n_keep = predict->nkeep();
|
// llama.params.n_keep = predict->nkeep();
|
||||||
// llama.params.seed = predict->seed();
|
// llama.params.seed = predict->seed();
|
||||||
// llama.params.sparams.grammar = predict->grammar();
|
// llama.params.sparams.grammar = predict->grammar();
|
||||||
@@ -2228,6 +2248,35 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
const std::vector<ggml_type> kv_cache_types = {
|
||||||
|
GGML_TYPE_F32,
|
||||||
|
GGML_TYPE_F16,
|
||||||
|
GGML_TYPE_BF16,
|
||||||
|
GGML_TYPE_Q8_0,
|
||||||
|
GGML_TYPE_Q4_0,
|
||||||
|
GGML_TYPE_Q4_1,
|
||||||
|
GGML_TYPE_IQ4_NL,
|
||||||
|
GGML_TYPE_Q5_0,
|
||||||
|
GGML_TYPE_Q5_1,
|
||||||
|
};
|
||||||
|
|
||||||
|
static ggml_type kv_cache_type_from_str(const std::string & s) {
|
||||||
|
for (const auto & type : kv_cache_types) {
|
||||||
|
if (ggml_type_name(type) == s) {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw std::runtime_error("Unsupported cache type: " + s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string get_all_kv_cache_types() {
|
||||||
|
std::ostringstream msg;
|
||||||
|
for (const auto & type : kv_cache_types) {
|
||||||
|
msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
|
||||||
|
}
|
||||||
|
return msg.str();
|
||||||
|
}
|
||||||
|
|
||||||
static void params_parse(const backend::ModelOptions* request,
|
static void params_parse(const backend::ModelOptions* request,
|
||||||
common_params & params) {
|
common_params & params) {
|
||||||
|
|
||||||
@@ -2242,10 +2291,10 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
// params.model_alias ??
|
// params.model_alias ??
|
||||||
params.model_alias = request->modelfile();
|
params.model_alias = request->modelfile();
|
||||||
if (!request->cachetypekey().empty()) {
|
if (!request->cachetypekey().empty()) {
|
||||||
params.cache_type_k = request->cachetypekey();
|
params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
|
||||||
}
|
}
|
||||||
if (!request->cachetypevalue().empty()) {
|
if (!request->cachetypevalue().empty()) {
|
||||||
params.cache_type_v = request->cachetypevalue();
|
params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
|
||||||
}
|
}
|
||||||
params.n_ctx = request->contextsize();
|
params.n_ctx = request->contextsize();
|
||||||
//params.memory_f16 = request->f16memory();
|
//params.memory_f16 = request->f16memory();
|
||||||
@@ -2264,7 +2313,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
|
|
||||||
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
||||||
if (llama_grpc_servers != NULL) {
|
if (llama_grpc_servers != NULL) {
|
||||||
params.rpc_servers = std::string(llama_grpc_servers);
|
add_rpc_devices(std::string(llama_grpc_servers));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Add yarn
|
// TODO: Add yarn
|
||||||
@@ -2330,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
if ( request->ropefreqscale() != 0.0f ) {
|
if ( request->ropefreqscale() != 0.0f ) {
|
||||||
params.rope_freq_scale = request->ropefreqscale();
|
params.rope_freq_scale = request->ropefreqscale();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (request->grammartriggers_size() > 0) {
|
||||||
|
LOG_INFO("configuring grammar triggers", {});
|
||||||
|
llama.grammar_lazy = true;
|
||||||
|
for (int i = 0; i < request->grammartriggers_size(); i++) {
|
||||||
|
common_grammar_trigger trigger;
|
||||||
|
trigger.word = request->grammartriggers(i).word();
|
||||||
|
trigger.at_start = request->grammartriggers(i).at_start();
|
||||||
|
llama.grammar_trigger_words.push_back(trigger);
|
||||||
|
LOG_INFO("grammar trigger", {
|
||||||
|
{ "word", trigger.word },
|
||||||
|
{ "at_start", trigger.at_start }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2390,6 +2454,13 @@ public:
|
|||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
reply.set_prompt_tokens(tokens_evaluated);
|
||||||
|
|
||||||
|
if (result.result_json.contains("timings")) {
|
||||||
|
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
||||||
|
reply.set_timing_prompt_processing(timing_prompt_processing);
|
||||||
|
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
||||||
|
reply.set_timing_token_generation(timing_token_generation);
|
||||||
|
}
|
||||||
|
|
||||||
// Log Request Correlation Id
|
// Log Request Correlation Id
|
||||||
LOG_VERBOSE("correlation:", {
|
LOG_VERBOSE("correlation:", {
|
||||||
{ "id", data["correlation_id"] }
|
{ "id", data["correlation_id"] }
|
||||||
@@ -2430,6 +2501,13 @@ public:
|
|||||||
reply->set_prompt_tokens(tokens_evaluated);
|
reply->set_prompt_tokens(tokens_evaluated);
|
||||||
reply->set_tokens(tokens_predicted);
|
reply->set_tokens(tokens_predicted);
|
||||||
reply->set_message(completion_text);
|
reply->set_message(completion_text);
|
||||||
|
|
||||||
|
if (result.result_json.contains("timings")) {
|
||||||
|
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
||||||
|
reply->set_timing_prompt_processing(timing_prompt_processing);
|
||||||
|
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
||||||
|
reply->set_timing_token_generation(timing_token_generation);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -2464,6 +2542,18 @@ public:
|
|||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
|
||||||
|
json data = parse_options(false, request, llama);
|
||||||
|
|
||||||
|
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
|
||||||
|
|
||||||
|
for (int i=0 ; i< tokens.size(); i++){
|
||||||
|
response->add_tokens(tokens[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return grpc::Status::OK;
|
||||||
|
}
|
||||||
|
|
||||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||||
llama_client_slot* active_slot = llama.get_active_slot();
|
llama_client_slot* active_slot = llama.get_active_slot();
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
index 342042ff..224db9b5 100644
|
index 3cd0d2fa..6c5e811a 100644
|
||||||
--- a/examples/llava/clip.cpp
|
--- a/examples/llava/clip.cpp
|
||||||
+++ b/examples/llava/clip.cpp
|
+++ b/examples/llava/clip.cpp
|
||||||
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||||
for (int i = 0; i < num_patches; i++) {
|
for (int i = 0; i < num_patches; i++) {
|
||||||
- patches_data[i] = i + 1;
|
- patches_data[i] = i + 1;
|
||||||
+ patches_data[i] = i;
|
+ patches_data[i] = i;
|
||||||
}
|
}
|
||||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
||||||
free(patches_data);
|
free(patches_data);
|
||||||
@@ -2,20 +2,95 @@ INCLUDE_PATH := $(abspath ./)
|
|||||||
LIBRARY_PATH := $(abspath ./)
|
LIBRARY_PATH := $(abspath ./)
|
||||||
|
|
||||||
AR?=ar
|
AR?=ar
|
||||||
|
CMAKE_ARGS?=
|
||||||
BUILD_TYPE?=
|
BUILD_TYPE?=
|
||||||
|
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
||||||
# keep standard at C11 and C++11
|
# keep standard at C11 and C++11
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
||||||
|
|
||||||
|
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
||||||
|
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
||||||
|
|
||||||
|
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
||||||
|
ifeq ($(BUILD_TYPE),cublas)
|
||||||
|
CMAKE_ARGS+=-DGGML_CUDA=ON
|
||||||
|
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
# to CMAKE_ARGS automatically
|
||||||
|
else ifeq ($(BUILD_TYPE),openblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
else ifeq ($(BUILD_TYPE),clblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
|
CMAKE_ARGS+=-DGGML_HIP=ON
|
||||||
|
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
|
# But if it's OSX without metal, disable it here
|
||||||
|
else ifeq ($(OS),Darwin)
|
||||||
|
ifneq ($(BUILD_TYPE),metal)
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
|
else
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
||||||
|
TARGET+=--target ggml-metal
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
# ifeq ($(BUILD_TYPE),sycl_f16)
|
||||||
|
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifeq ($(BUILD_TYPE),sycl_f32)
|
||||||
|
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
|
||||||
|
# endif
|
||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
||||||
|
|
||||||
|
# Find all .a archives in ARCHIVE_DIR
|
||||||
|
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
||||||
|
GGML_ARCHIVE_DIR := build/ggml/src/
|
||||||
|
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
|
||||||
|
|
||||||
|
# Name of the single merged library
|
||||||
|
COMBINED_LIB := libggmlall.a
|
||||||
|
|
||||||
|
# Rule to merge all the .a files into one
|
||||||
|
$(COMBINED_LIB): $(ALL_ARCHIVES)
|
||||||
|
@echo "Merging all .a into $(COMBINED_LIB)"
|
||||||
|
rm -f $@
|
||||||
|
mkdir -p merge-tmp
|
||||||
|
for a in $(ALL_ARCHIVES); do \
|
||||||
|
( cd merge-tmp && ar x ../$$a ); \
|
||||||
|
done
|
||||||
|
( cd merge-tmp && ar rcs ../$@ *.o )
|
||||||
|
# Ensure we have a proper index
|
||||||
|
ranlib $@
|
||||||
|
# Clean up
|
||||||
|
rm -rf merge-tmp
|
||||||
|
|
||||||
|
build/libstable-diffusion.a:
|
||||||
|
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
+bash -c "source $(ONEAPI_VARS); \
|
||||||
|
mkdir -p build && \
|
||||||
|
cd build && \
|
||||||
|
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
||||||
|
cmake --build . --config Release"
|
||||||
|
else
|
||||||
|
mkdir -p build && \
|
||||||
|
cd build && \
|
||||||
|
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
||||||
|
cmake --build . --config Release
|
||||||
|
endif
|
||||||
|
$(MAKE) $(COMBINED_LIB)
|
||||||
|
|
||||||
gosd.o:
|
gosd.o:
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
||||||
|
|
||||||
libsd.a: gosd.o
|
libsd.a: gosd.o
|
||||||
cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
|
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
||||||
$(AR) rcs libsd.a gosd.o
|
$(AR) rcs libsd.a gosd.o
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f gosd.o libsd.a
|
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
|
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
|
||||||
// #include <gosd.h>
|
// #include <gosd.h>
|
||||||
// #include <stdlib.h>
|
// #include <stdlib.h>
|
||||||
import "C"
|
import "C"
|
||||||
|
|||||||
@@ -1,21 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Image struct {
|
|
||||||
base.SingleThread
|
|
||||||
stablediffusion *stablediffusion.StableDiffusion
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
|
||||||
var err error
|
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
|
||||||
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
return image.stablediffusion.GenerateImage(
|
|
||||||
int(opts.Height),
|
|
||||||
int(opts.Width),
|
|
||||||
int(opts.Mode),
|
|
||||||
int(opts.Step),
|
|
||||||
int(opts.Seed),
|
|
||||||
opts.PositivePrompt,
|
|
||||||
opts.NegativePrompt,
|
|
||||||
opts.Dst)
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/tinydream"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Image struct {
|
|
||||||
base.SingleThread
|
|
||||||
tinydream *tinydream.TinyDream
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
|
||||||
var err error
|
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
|
||||||
image.tinydream, err = tinydream.New(opts.ModelFile)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
return image.tinydream.GenerateImage(
|
|
||||||
int(opts.Height),
|
|
||||||
int(opts.Width),
|
|
||||||
int(opts.Step),
|
|
||||||
int(opts.Seed),
|
|
||||||
opts.PositivePrompt,
|
|
||||||
opts.NegativePrompt,
|
|
||||||
opts.Dst)
|
|
||||||
}
|
|
||||||
@@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func isNormalized(k []float32) bool {
|
func isNormalized(k []float32) bool {
|
||||||
var sum float32
|
var sum float64
|
||||||
|
|
||||||
for _, v := range k {
|
for _, v := range k {
|
||||||
sum += v
|
v64 := float64(v)
|
||||||
|
sum += v64*v64
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum == 1.0
|
s := math.Sqrt(sum)
|
||||||
|
|
||||||
|
return s >= 0.99 && s <= 1.01
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This we could replace with handwritten SIMD code
|
// TODO: This we could replace with handwritten SIMD code
|
||||||
@@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
|
|||||||
dot += k1[i] * k2[i]
|
dot += k1[i] * k2[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
|
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
|
||||||
|
|
||||||
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
||||||
return dot
|
return dot
|
||||||
@@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
|
|||||||
|
|
||||||
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
||||||
|
|
||||||
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
|
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
|
||||||
|
|
||||||
return sim
|
return sim
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
torch
|
torch==2.3.1+cxx11.abi
|
||||||
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools
|
||||||
2
backend/python/autogptq/requirements-l4t.txt
Normal file
2
backend/python/autogptq/requirements-l4t.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.68.1
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
@@ -1,8 +1,9 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
torch
|
torch==2.3.1+cxx11.abi
|
||||||
torchaudio
|
torchaudio==2.3.1+cxx11.abi
|
||||||
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
5
backend/python/bark/requirements-l4t.txt
Normal file
5
backend/python/bark/requirements-l4t.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.68.1
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -17,6 +17,9 @@
|
|||||||
# LIMIT_TARGETS="cublas12"
|
# LIMIT_TARGETS="cublas12"
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
# source $(dirname $0)/../common/libbackend.sh
|
||||||
#
|
#
|
||||||
|
|
||||||
|
PYTHON_VERSION="3.10"
|
||||||
|
|
||||||
function init() {
|
function init() {
|
||||||
# Name of the backend (directory name)
|
# Name of the backend (directory name)
|
||||||
BACKEND_NAME=${PWD##*/}
|
BACKEND_NAME=${PWD##*/}
|
||||||
@@ -88,7 +91,7 @@ function getBuildProfile() {
|
|||||||
# always result in an activated virtual environment
|
# always result in an activated virtual environment
|
||||||
function ensureVenv() {
|
function ensureVenv() {
|
||||||
if [ ! -d "${EDIR}/venv" ]; then
|
if [ ! -d "${EDIR}/venv" ]; then
|
||||||
uv venv ${EDIR}/venv
|
uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
|
||||||
echo "virtualenv created"
|
echo "virtualenv created"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -129,11 +132,16 @@ function installRequirements() {
|
|||||||
declare -a requirementFiles=(
|
declare -a requirementFiles=(
|
||||||
"${EDIR}/requirements-install.txt"
|
"${EDIR}/requirements-install.txt"
|
||||||
"${EDIR}/requirements.txt"
|
"${EDIR}/requirements.txt"
|
||||||
"${EDIR}/requirements-${BUILD_TYPE}.txt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
if [ -n "${BUILD_PLATFORM}" ]; then
|
||||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}.txt")
|
||||||
|
else
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}.txt")
|
||||||
|
|
||||||
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
||||||
@@ -143,8 +151,14 @@ function installRequirements() {
|
|||||||
|
|
||||||
requirementFiles+=("${EDIR}/requirements-after.txt")
|
requirementFiles+=("${EDIR}/requirements-after.txt")
|
||||||
|
|
||||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
if [ -n "${BUILD_PLATFORM}" ]; then
|
||||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}-after.txt")
|
||||||
|
else
|
||||||
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
||||||
|
else
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}-after.txt")
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for reqFile in ${requirementFiles[@]}; do
|
for reqFile in ${requirementFiles[@]}; do
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
torch
|
torch==2.3.1+cxx11.abi
|
||||||
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.68.1
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
grpcio-tools
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
torch
|
torch==2.3.1+cxx11.abi
|
||||||
torchaudio
|
torchaudio==2.3.1+cxx11.abi
|
||||||
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
coqui-tts
|
||||||
6
backend/python/coqui/requirements-l4t.txt
Normal file
6
backend/python/coqui/requirements-l4t.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
coqui-tts
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.68.1
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
@@ -17,7 +17,7 @@ import backend_pb2_grpc
|
|||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
@@ -275,6 +275,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
if request.LowVRAM:
|
if request.LowVRAM:
|
||||||
self.pipe.enable_model_cpu_offload()
|
self.pipe.enable_model_cpu_offload()
|
||||||
|
elif request.PipelineType == "SanaPipeline":
|
||||||
|
self.pipe = SanaPipeline.from_pretrained(
|
||||||
|
request.Model,
|
||||||
|
variant="bf16",
|
||||||
|
torch_dtype=torch.bfloat16)
|
||||||
|
self.pipe.vae.to(torch.bfloat16)
|
||||||
|
self.pipe.text_encoder.to(torch.bfloat16)
|
||||||
|
|
||||||
if CLIPSKIP and request.CLIPSkip != 0:
|
if CLIPSKIP and request.CLIPSkip != 0:
|
||||||
self.clip_skip = request.CLIPSkip
|
self.clip_skip = request.CLIPSkip
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
torch
|
torch==2.3.1+cxx11.abi
|
||||||
torchvision
|
torchvision==0.18.1+cxx11.abi
|
||||||
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools
|
||||||
diffusers
|
diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
10
backend/python/diffusers/requirements-l4t.txt
Normal file
10
backend/python/diffusers/requirements-l4t.txt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.68.1
|
grpcio==1.70.0
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
4
backend/python/exllama2/requirements-l4t.txt
Normal file
4
backend/python/exllama2/requirements-l4t.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.68.1
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
.DEFAULT_GOAL := install
|
.DEFAULT_GOAL := install
|
||||||
|
|
||||||
.PHONY: install
|
.PHONY: install
|
||||||
install: protogen
|
install:
|
||||||
bash install.sh
|
bash install.sh
|
||||||
|
$(MAKE) protogen
|
||||||
|
|
||||||
.PHONY: protogen
|
.PHONY: protogen
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
@@ -12,14 +13,8 @@ protogen-clean:
|
|||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
bash protogen.sh
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: protogen-clean
|
clean: protogen-clean
|
||||||
rm -rf venv __pycache__
|
rm -rf venv __pycache__
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing openvoice..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "openvoice tested."
|
|
||||||
@@ -1,85 +1,65 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Extra gRPC server for HuggingFace SentenceTransformer models.
|
This is an extra gRPC server of LocalAI for Bark TTS
|
||||||
"""
|
"""
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
import time
|
||||||
import argparse
|
import argparse
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
import backend_pb2
|
||||||
import backend_pb2_grpc
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
# Implement the BackendServicer class with the service methods
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
"""
|
"""
|
||||||
A gRPC servicer for the backend service.
|
BackendServicer is the class that implements the gRPC service
|
||||||
|
|
||||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
|
||||||
"""
|
"""
|
||||||
def Health(self, request, context):
|
def Health(self, request, context):
|
||||||
"""
|
|
||||||
A gRPC method that returns the health status of the backend service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A HealthRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Reply object that contains the health status of the backend service.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
def LoadModel(self, request, context):
|
||||||
"""
|
device = "cpu"
|
||||||
A gRPC method that loads a model into memory.
|
# Get device
|
||||||
|
# device = "cuda" if request.CUDA else "cpu"
|
||||||
|
if request.CUDA:
|
||||||
|
device = "cuda"
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A LoadModelRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Result object that contains the result of the LoadModel operation.
|
|
||||||
"""
|
|
||||||
model_name = request.Model
|
|
||||||
try:
|
try:
|
||||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
print("Preparing models, please wait", file=sys.stderr)
|
||||||
|
self.model = WhisperModel(request.Model, device=device, compute_type="float16")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
# Implement your logic here for the LoadModel service
|
# Implement your logic here for the LoadModel service
|
||||||
# Replace this with your desired response
|
# Replace this with your desired response
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
def AudioTranscription(self, request, context):
|
||||||
"""
|
resultSegments = []
|
||||||
A gRPC method that calculates embeddings for a given sentence.
|
text = ""
|
||||||
|
try:
|
||||||
Args:
|
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
|
||||||
request: An EmbeddingRequest object that contains the request parameters.
|
id = 0
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
for segment in segments:
|
||||||
|
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
||||||
Returns:
|
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
|
||||||
An EmbeddingResult object that contains the calculated embeddings.
|
text += segment.text
|
||||||
"""
|
id += 1
|
||||||
# Implement your logic here for the Embedding service
|
except Exception as err:
|
||||||
# Replace this with your desired response
|
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
|
||||||
sentence_embeddings = self.model.encode(request.Embeddings)
|
|
||||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
|
|
||||||
|
|
||||||
|
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
torch==2.4.1
|
||||||
|
optimum-quanto
|
||||||
@@ -1,5 +1,9 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch==2.4.1+cu118
|
||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.3.1
|
compel
|
||||||
transformers
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
torch==2.4.1
|
||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
transformers
|
torch
|
||||||
accelerate
|
faster-whisper
|
||||||
torch==2.4.1+rocm6.0
|
|
||||||
6
backend/python/faster-whisper/requirements-intel.txt
Normal file
6
backend/python/faster-whisper/requirements-intel.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
|
torch==2.3.1+cxx11.abi
|
||||||
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
|
optimum[openvino]
|
||||||
|
faster-whisper
|
||||||
9
backend/python/faster-whisper/requirements-l4t.txt
Normal file
9
backend/python/faster-whisper/requirements-l4t.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
3
backend/python/faster-whisper/requirements.txt
Normal file
3
backend/python/faster-whisper/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
grpcio==1.70.0
|
||||||
|
protobuf
|
||||||
|
grpcio-tools
|
||||||
20
backend/python/kokoro/Makefile
Normal file
20
backend/python/kokoro/Makefile
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
.DEFAULT_GOAL := install
|
||||||
|
|
||||||
|
.PHONY: install
|
||||||
|
install:
|
||||||
|
bash install.sh
|
||||||
|
$(MAKE) protogen
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
bash protogen.sh
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean: protogen-clean
|
||||||
|
rm -rf venv __pycache__
|
||||||
64
backend/python/parler-tts/backend.py → backend/python/kokoro/backend.py
Normal file → Executable file
64
backend/python/parler-tts/backend.py → backend/python/kokoro/backend.py
Normal file → Executable file
@@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Extra gRPC server for MusicgenForConditionalGeneration models.
|
Extra gRPC server for Kokoro models.
|
||||||
"""
|
"""
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
|
||||||
@@ -8,20 +8,17 @@ import argparse
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import backend_pb2
|
import backend_pb2
|
||||||
import backend_pb2_grpc
|
import backend_pb2_grpc
|
||||||
|
import soundfile as sf
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from scipy.io.wavfile import write as write_wav
|
from models import build_model
|
||||||
|
from kokoro import generate
|
||||||
from parler_tts import ParlerTTSForConditionalGeneration
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
import soundfile as sf
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
SAMPLE_RATE = 22050
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
@@ -59,10 +56,31 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
A Result object that contains the result of the LoadModel operation.
|
A Result object that contains the result of the LoadModel operation.
|
||||||
"""
|
"""
|
||||||
model_name = request.Model
|
model_name = request.Model
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
||||||
try:
|
try:
|
||||||
self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
|
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
self.MODEL = build_model(request.ModelFile, device)
|
||||||
|
options = request.Options
|
||||||
|
# Find the voice from the options, options are a list of strings in this form optname:optvalue:
|
||||||
|
VOICE_NAME = None
|
||||||
|
for opt in options:
|
||||||
|
if opt.startswith("voice:"):
|
||||||
|
VOICE_NAME = opt.split(":")[1]
|
||||||
|
break
|
||||||
|
if VOICE_NAME is None:
|
||||||
|
return backend_pb2.Result(success=False, message=f"No voice specified in options")
|
||||||
|
MODELPATH = request.ModelPath
|
||||||
|
# If voice name contains a plus, split it and load the two models and combine them
|
||||||
|
if "+" in VOICE_NAME:
|
||||||
|
voice1, voice2 = VOICE_NAME.split("+")
|
||||||
|
voice1 = torch.load(f'{MODELPATH}/{voice1}.pt', weights_only=True).to(device)
|
||||||
|
voice2 = torch.load(f'{MODELPATH}/{voice2}.pt', weights_only=True).to(device)
|
||||||
|
self.VOICEPACK = torch.mean(torch.stack([voice1, voice2]), dim=0)
|
||||||
|
else:
|
||||||
|
self.VOICEPACK = torch.load(f'{MODELPATH}/{VOICE_NAME}.pt', weights_only=True).to(device)
|
||||||
|
|
||||||
|
self.VOICE_NAME = VOICE_NAME
|
||||||
|
|
||||||
|
print(f'Loaded voice: {VOICE_NAME}')
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
@@ -70,38 +88,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
def TTS(self, request, context):
|
def TTS(self, request, context):
|
||||||
model_name = request.model
|
model_name = request.model
|
||||||
voice = request.voice
|
|
||||||
if voice == "":
|
|
||||||
voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
|
||||||
if model_name == "":
|
if model_name == "":
|
||||||
return backend_pb2.Result(success=False, message="request.model is required")
|
return backend_pb2.Result(success=False, message="request.model is required")
|
||||||
try:
|
try:
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
audio, out_ps = generate(self.MODEL, request.text, self.VOICEPACK, lang=self.VOICE_NAME)
|
||||||
input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
|
print(out_ps)
|
||||||
prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
|
sf.write(request.dst, audio, SAMPLE_RATE)
|
||||||
|
|
||||||
generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
|
||||||
audio_arr = generation.cpu().numpy().squeeze()
|
|
||||||
print("[parler-tts] TTS generated!", file=sys.stderr)
|
|
||||||
sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
|
|
||||||
print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
|
|
||||||
print("[parler-tts] TTS for", file=sys.stderr)
|
|
||||||
print(request, file=sys.stderr)
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
|
print("[Kokoro] Server started. Listening on: " + address, file=sys.stderr)
|
||||||
|
|
||||||
# Define the signal handler function
|
# Define the signal handler function
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
print("[parler-tts] Received termination signal. Shutting down...")
|
print("[Kokoro] Received termination signal. Shutting down...")
|
||||||
server.stop(0)
|
server.stop(0)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
@@ -121,5 +127,5 @@ if __name__ == "__main__":
|
|||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
print(f"[parler-tts] startup: {args}", file=sys.stderr)
|
print(f"[Kokoro] startup: {args}", file=sys.stderr)
|
||||||
serve(args.addr)
|
serve(args.addr)
|
||||||
524
backend/python/kokoro/istftnet.py
Normal file
524
backend/python/kokoro/istftnet.py
Normal file
@@ -0,0 +1,524 @@
|
|||||||
|
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/istftnet.py
|
||||||
|
# https://github.com/yl4579/StyleTTS2/blob/main/Modules/istftnet.py
|
||||||
|
from scipy.signal import get_window
|
||||||
|
from torch.nn import Conv1d, ConvTranspose1d
|
||||||
|
from torch.nn.utils import weight_norm, remove_weight_norm
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
# https://github.com/yl4579/StyleTTS2/blob/main/Modules/utils.py
|
||||||
|
def init_weights(m, mean=0.0, std=0.01):
|
||||||
|
classname = m.__class__.__name__
|
||||||
|
if classname.find("Conv") != -1:
|
||||||
|
m.weight.data.normal_(mean, std)
|
||||||
|
|
||||||
|
def get_padding(kernel_size, dilation=1):
|
||||||
|
return int((kernel_size*dilation - dilation)/2)
|
||||||
|
|
||||||
|
LRELU_SLOPE = 0.1
|
||||||
|
|
||||||
|
class AdaIN1d(nn.Module):
|
||||||
|
def __init__(self, style_dim, num_features):
|
||||||
|
super().__init__()
|
||||||
|
self.norm = nn.InstanceNorm1d(num_features, affine=False)
|
||||||
|
self.fc = nn.Linear(style_dim, num_features*2)
|
||||||
|
|
||||||
|
def forward(self, x, s):
|
||||||
|
h = self.fc(s)
|
||||||
|
h = h.view(h.size(0), h.size(1), 1)
|
||||||
|
gamma, beta = torch.chunk(h, chunks=2, dim=1)
|
||||||
|
return (1 + gamma) * self.norm(x) + beta
|
||||||
|
|
||||||
|
class AdaINResBlock1(torch.nn.Module):
|
||||||
|
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5), style_dim=64):
|
||||||
|
super(AdaINResBlock1, self).__init__()
|
||||||
|
self.convs1 = nn.ModuleList([
|
||||||
|
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
|
||||||
|
padding=get_padding(kernel_size, dilation[0]))),
|
||||||
|
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
|
||||||
|
padding=get_padding(kernel_size, dilation[1]))),
|
||||||
|
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
|
||||||
|
padding=get_padding(kernel_size, dilation[2])))
|
||||||
|
])
|
||||||
|
self.convs1.apply(init_weights)
|
||||||
|
|
||||||
|
self.convs2 = nn.ModuleList([
|
||||||
|
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
||||||
|
padding=get_padding(kernel_size, 1))),
|
||||||
|
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
||||||
|
padding=get_padding(kernel_size, 1))),
|
||||||
|
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
||||||
|
padding=get_padding(kernel_size, 1)))
|
||||||
|
])
|
||||||
|
self.convs2.apply(init_weights)
|
||||||
|
|
||||||
|
self.adain1 = nn.ModuleList([
|
||||||
|
AdaIN1d(style_dim, channels),
|
||||||
|
AdaIN1d(style_dim, channels),
|
||||||
|
AdaIN1d(style_dim, channels),
|
||||||
|
])
|
||||||
|
|
||||||
|
self.adain2 = nn.ModuleList([
|
||||||
|
AdaIN1d(style_dim, channels),
|
||||||
|
AdaIN1d(style_dim, channels),
|
||||||
|
AdaIN1d(style_dim, channels),
|
||||||
|
])
|
||||||
|
|
||||||
|
self.alpha1 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs1))])
|
||||||
|
self.alpha2 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs2))])
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x, s):
|
||||||
|
for c1, c2, n1, n2, a1, a2 in zip(self.convs1, self.convs2, self.adain1, self.adain2, self.alpha1, self.alpha2):
|
||||||
|
xt = n1(x, s)
|
||||||
|
xt = xt + (1 / a1) * (torch.sin(a1 * xt) ** 2) # Snake1D
|
||||||
|
xt = c1(xt)
|
||||||
|
xt = n2(xt, s)
|
||||||
|
xt = xt + (1 / a2) * (torch.sin(a2 * xt) ** 2) # Snake1D
|
||||||
|
xt = c2(xt)
|
||||||
|
x = xt + x
|
||||||
|
return x
|
||||||
|
|
||||||
|
def remove_weight_norm(self):
|
||||||
|
for l in self.convs1:
|
||||||
|
remove_weight_norm(l)
|
||||||
|
for l in self.convs2:
|
||||||
|
remove_weight_norm(l)
|
||||||
|
|
||||||
|
class TorchSTFT(torch.nn.Module):
|
||||||
|
def __init__(self, filter_length=800, hop_length=200, win_length=800, window='hann'):
|
||||||
|
super().__init__()
|
||||||
|
self.filter_length = filter_length
|
||||||
|
self.hop_length = hop_length
|
||||||
|
self.win_length = win_length
|
||||||
|
self.window = torch.from_numpy(get_window(window, win_length, fftbins=True).astype(np.float32))
|
||||||
|
|
||||||
|
def transform(self, input_data):
|
||||||
|
forward_transform = torch.stft(
|
||||||
|
input_data,
|
||||||
|
self.filter_length, self.hop_length, self.win_length, window=self.window.to(input_data.device),
|
||||||
|
return_complex=True)
|
||||||
|
|
||||||
|
return torch.abs(forward_transform), torch.angle(forward_transform)
|
||||||
|
|
||||||
|
def inverse(self, magnitude, phase):
|
||||||
|
inverse_transform = torch.istft(
|
||||||
|
magnitude * torch.exp(phase * 1j),
|
||||||
|
self.filter_length, self.hop_length, self.win_length, window=self.window.to(magnitude.device))
|
||||||
|
|
||||||
|
return inverse_transform.unsqueeze(-2) # unsqueeze to stay consistent with conv_transpose1d implementation
|
||||||
|
|
||||||
|
def forward(self, input_data):
|
||||||
|
self.magnitude, self.phase = self.transform(input_data)
|
||||||
|
reconstruction = self.inverse(self.magnitude, self.phase)
|
||||||
|
return reconstruction
|
||||||
|
|
||||||
|
class SineGen(torch.nn.Module):
|
||||||
|
""" Definition of sine generator
|
||||||
|
SineGen(samp_rate, harmonic_num = 0,
|
||||||
|
sine_amp = 0.1, noise_std = 0.003,
|
||||||
|
voiced_threshold = 0,
|
||||||
|
flag_for_pulse=False)
|
||||||
|
samp_rate: sampling rate in Hz
|
||||||
|
harmonic_num: number of harmonic overtones (default 0)
|
||||||
|
sine_amp: amplitude of sine-wavefrom (default 0.1)
|
||||||
|
noise_std: std of Gaussian noise (default 0.003)
|
||||||
|
voiced_thoreshold: F0 threshold for U/V classification (default 0)
|
||||||
|
flag_for_pulse: this SinGen is used inside PulseGen (default False)
|
||||||
|
Note: when flag_for_pulse is True, the first time step of a voiced
|
||||||
|
segment is always sin(np.pi) or cos(0)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, samp_rate, upsample_scale, harmonic_num=0,
|
||||||
|
sine_amp=0.1, noise_std=0.003,
|
||||||
|
voiced_threshold=0,
|
||||||
|
flag_for_pulse=False):
|
||||||
|
super(SineGen, self).__init__()
|
||||||
|
self.sine_amp = sine_amp
|
||||||
|
self.noise_std = noise_std
|
||||||
|
self.harmonic_num = harmonic_num
|
||||||
|
self.dim = self.harmonic_num + 1
|
||||||
|
self.sampling_rate = samp_rate
|
||||||
|
self.voiced_threshold = voiced_threshold
|
||||||
|
self.flag_for_pulse = flag_for_pulse
|
||||||
|
self.upsample_scale = upsample_scale
|
||||||
|
|
||||||
|
def _f02uv(self, f0):
|
||||||
|
# generate uv signal
|
||||||
|
uv = (f0 > self.voiced_threshold).type(torch.float32)
|
||||||
|
return uv
|
||||||
|
|
||||||
|
def _f02sine(self, f0_values):
|
||||||
|
""" f0_values: (batchsize, length, dim)
|
||||||
|
where dim indicates fundamental tone and overtones
|
||||||
|
"""
|
||||||
|
# convert to F0 in rad. The interger part n can be ignored
|
||||||
|
# because 2 * np.pi * n doesn't affect phase
|
||||||
|
rad_values = (f0_values / self.sampling_rate) % 1
|
||||||
|
|
||||||
|
# initial phase noise (no noise for fundamental component)
|
||||||
|
rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \
|
||||||
|
device=f0_values.device)
|
||||||
|
rand_ini[:, 0] = 0
|
||||||
|
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
|
||||||
|
|
||||||
|
# instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad)
|
||||||
|
if not self.flag_for_pulse:
|
||||||
|
# # for normal case
|
||||||
|
|
||||||
|
# # To prevent torch.cumsum numerical overflow,
|
||||||
|
# # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
|
||||||
|
# # Buffer tmp_over_one_idx indicates the time step to add -1.
|
||||||
|
# # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi
|
||||||
|
# tmp_over_one = torch.cumsum(rad_values, 1) % 1
|
||||||
|
# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
|
||||||
|
# cumsum_shift = torch.zeros_like(rad_values)
|
||||||
|
# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
|
||||||
|
|
||||||
|
# phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
|
||||||
|
rad_values = torch.nn.functional.interpolate(rad_values.transpose(1, 2),
|
||||||
|
scale_factor=1/self.upsample_scale,
|
||||||
|
mode="linear").transpose(1, 2)
|
||||||
|
|
||||||
|
# tmp_over_one = torch.cumsum(rad_values, 1) % 1
|
||||||
|
# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
|
||||||
|
# cumsum_shift = torch.zeros_like(rad_values)
|
||||||
|
# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
|
||||||
|
|
||||||
|
phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
|
||||||
|
phase = torch.nn.functional.interpolate(phase.transpose(1, 2) * self.upsample_scale,
|
||||||
|
scale_factor=self.upsample_scale, mode="linear").transpose(1, 2)
|
||||||
|
sines = torch.sin(phase)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# If necessary, make sure that the first time step of every
|
||||||
|
# voiced segments is sin(pi) or cos(0)
|
||||||
|
# This is used for pulse-train generation
|
||||||
|
|
||||||
|
# identify the last time step in unvoiced segments
|
||||||
|
uv = self._f02uv(f0_values)
|
||||||
|
uv_1 = torch.roll(uv, shifts=-1, dims=1)
|
||||||
|
uv_1[:, -1, :] = 1
|
||||||
|
u_loc = (uv < 1) * (uv_1 > 0)
|
||||||
|
|
||||||
|
# get the instantanouse phase
|
||||||
|
tmp_cumsum = torch.cumsum(rad_values, dim=1)
|
||||||
|
# different batch needs to be processed differently
|
||||||
|
for idx in range(f0_values.shape[0]):
|
||||||
|
temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :]
|
||||||
|
temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :]
|
||||||
|
# stores the accumulation of i.phase within
|
||||||
|
# each voiced segments
|
||||||
|
tmp_cumsum[idx, :, :] = 0
|
||||||
|
tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum
|
||||||
|
|
||||||
|
# rad_values - tmp_cumsum: remove the accumulation of i.phase
|
||||||
|
# within the previous voiced segment.
|
||||||
|
i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1)
|
||||||
|
|
||||||
|
# get the sines
|
||||||
|
sines = torch.cos(i_phase * 2 * np.pi)
|
||||||
|
return sines
|
||||||
|
|
||||||
|
def forward(self, f0):
|
||||||
|
""" sine_tensor, uv = forward(f0)
|
||||||
|
input F0: tensor(batchsize=1, length, dim=1)
|
||||||
|
f0 for unvoiced steps should be 0
|
||||||
|
output sine_tensor: tensor(batchsize=1, length, dim)
|
||||||
|
output uv: tensor(batchsize=1, length, 1)
|
||||||
|
"""
|
||||||
|
f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
|
||||||
|
device=f0.device)
|
||||||
|
# fundamental component
|
||||||
|
fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
|
||||||
|
|
||||||
|
# generate sine waveforms
|
||||||
|
sine_waves = self._f02sine(fn) * self.sine_amp
|
||||||
|
|
||||||
|
# generate uv signal
|
||||||
|
# uv = torch.ones(f0.shape)
|
||||||
|
# uv = uv * (f0 > self.voiced_threshold)
|
||||||
|
uv = self._f02uv(f0)
|
||||||
|
|
||||||
|
# noise: for unvoiced should be similar to sine_amp
|
||||||
|
# std = self.sine_amp/3 -> max value ~ self.sine_amp
|
||||||
|
# . for voiced regions is self.noise_std
|
||||||
|
noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
|
||||||
|
noise = noise_amp * torch.randn_like(sine_waves)
|
||||||
|
|
||||||
|
# first: set the unvoiced part to 0 by uv
|
||||||
|
# then: additive noise
|
||||||
|
sine_waves = sine_waves * uv + noise
|
||||||
|
return sine_waves, uv, noise
|
||||||
|
|
||||||
|
|
||||||
|
class SourceModuleHnNSF(torch.nn.Module):
|
||||||
|
""" SourceModule for hn-nsf
|
||||||
|
SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1,
|
||||||
|
add_noise_std=0.003, voiced_threshod=0)
|
||||||
|
sampling_rate: sampling_rate in Hz
|
||||||
|
harmonic_num: number of harmonic above F0 (default: 0)
|
||||||
|
sine_amp: amplitude of sine source signal (default: 0.1)
|
||||||
|
add_noise_std: std of additive Gaussian noise (default: 0.003)
|
||||||
|
note that amplitude of noise in unvoiced is decided
|
||||||
|
by sine_amp
|
||||||
|
voiced_threshold: threhold to set U/V given F0 (default: 0)
|
||||||
|
Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
|
||||||
|
F0_sampled (batchsize, length, 1)
|
||||||
|
Sine_source (batchsize, length, 1)
|
||||||
|
noise_source (batchsize, length 1)
|
||||||
|
uv (batchsize, length, 1)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, sampling_rate, upsample_scale, harmonic_num=0, sine_amp=0.1,
|
||||||
|
add_noise_std=0.003, voiced_threshod=0):
|
||||||
|
super(SourceModuleHnNSF, self).__init__()
|
||||||
|
|
||||||
|
self.sine_amp = sine_amp
|
||||||
|
self.noise_std = add_noise_std
|
||||||
|
|
||||||
|
# to produce sine waveforms
|
||||||
|
self.l_sin_gen = SineGen(sampling_rate, upsample_scale, harmonic_num,
|
||||||
|
sine_amp, add_noise_std, voiced_threshod)
|
||||||
|
|
||||||
|
# to merge source harmonics into a single excitation
|
||||||
|
self.l_linear = torch.nn.Linear(harmonic_num + 1, 1)
|
||||||
|
self.l_tanh = torch.nn.Tanh()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""
|
||||||
|
Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
|
||||||
|
F0_sampled (batchsize, length, 1)
|
||||||
|
Sine_source (batchsize, length, 1)
|
||||||
|
noise_source (batchsize, length 1)
|
||||||
|
"""
|
||||||
|
# source for harmonic branch
|
||||||
|
with torch.no_grad():
|
||||||
|
sine_wavs, uv, _ = self.l_sin_gen(x)
|
||||||
|
sine_merge = self.l_tanh(self.l_linear(sine_wavs))
|
||||||
|
|
||||||
|
# source for noise branch, in the same shape as uv
|
||||||
|
noise = torch.randn_like(uv) * self.sine_amp / 3
|
||||||
|
return sine_merge, noise, uv
|
||||||
|
def padDiff(x):
|
||||||
|
return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0)
|
||||||
|
|
||||||
|
|
||||||
|
class Generator(torch.nn.Module):
|
||||||
|
def __init__(self, style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size):
|
||||||
|
super(Generator, self).__init__()
|
||||||
|
|
||||||
|
self.num_kernels = len(resblock_kernel_sizes)
|
||||||
|
self.num_upsamples = len(upsample_rates)
|
||||||
|
resblock = AdaINResBlock1
|
||||||
|
|
||||||
|
self.m_source = SourceModuleHnNSF(
|
||||||
|
sampling_rate=24000,
|
||||||
|
upsample_scale=np.prod(upsample_rates) * gen_istft_hop_size,
|
||||||
|
harmonic_num=8, voiced_threshod=10)
|
||||||
|
self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates) * gen_istft_hop_size)
|
||||||
|
self.noise_convs = nn.ModuleList()
|
||||||
|
self.noise_res = nn.ModuleList()
|
||||||
|
|
||||||
|
self.ups = nn.ModuleList()
|
||||||
|
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
|
||||||
|
self.ups.append(weight_norm(
|
||||||
|
ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)),
|
||||||
|
k, u, padding=(k-u)//2)))
|
||||||
|
|
||||||
|
self.resblocks = nn.ModuleList()
|
||||||
|
for i in range(len(self.ups)):
|
||||||
|
ch = upsample_initial_channel//(2**(i+1))
|
||||||
|
for j, (k, d) in enumerate(zip(resblock_kernel_sizes,resblock_dilation_sizes)):
|
||||||
|
self.resblocks.append(resblock(ch, k, d, style_dim))
|
||||||
|
|
||||||
|
c_cur = upsample_initial_channel // (2 ** (i + 1))
|
||||||
|
|
||||||
|
if i + 1 < len(upsample_rates): #
|
||||||
|
stride_f0 = np.prod(upsample_rates[i + 1:])
|
||||||
|
self.noise_convs.append(Conv1d(
|
||||||
|
gen_istft_n_fft + 2, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
|
||||||
|
self.noise_res.append(resblock(c_cur, 7, [1,3,5], style_dim))
|
||||||
|
else:
|
||||||
|
self.noise_convs.append(Conv1d(gen_istft_n_fft + 2, c_cur, kernel_size=1))
|
||||||
|
self.noise_res.append(resblock(c_cur, 11, [1,3,5], style_dim))
|
||||||
|
|
||||||
|
|
||||||
|
self.post_n_fft = gen_istft_n_fft
|
||||||
|
self.conv_post = weight_norm(Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3))
|
||||||
|
self.ups.apply(init_weights)
|
||||||
|
self.conv_post.apply(init_weights)
|
||||||
|
self.reflection_pad = torch.nn.ReflectionPad1d((1, 0))
|
||||||
|
self.stft = TorchSTFT(filter_length=gen_istft_n_fft, hop_length=gen_istft_hop_size, win_length=gen_istft_n_fft)
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x, s, f0):
|
||||||
|
with torch.no_grad():
|
||||||
|
f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2) # bs,n,t
|
||||||
|
|
||||||
|
har_source, noi_source, uv = self.m_source(f0)
|
||||||
|
har_source = har_source.transpose(1, 2).squeeze(1)
|
||||||
|
har_spec, har_phase = self.stft.transform(har_source)
|
||||||
|
har = torch.cat([har_spec, har_phase], dim=1)
|
||||||
|
|
||||||
|
for i in range(self.num_upsamples):
|
||||||
|
x = F.leaky_relu(x, LRELU_SLOPE)
|
||||||
|
x_source = self.noise_convs[i](har)
|
||||||
|
x_source = self.noise_res[i](x_source, s)
|
||||||
|
|
||||||
|
x = self.ups[i](x)
|
||||||
|
if i == self.num_upsamples - 1:
|
||||||
|
x = self.reflection_pad(x)
|
||||||
|
|
||||||
|
x = x + x_source
|
||||||
|
xs = None
|
||||||
|
for j in range(self.num_kernels):
|
||||||
|
if xs is None:
|
||||||
|
xs = self.resblocks[i*self.num_kernels+j](x, s)
|
||||||
|
else:
|
||||||
|
xs += self.resblocks[i*self.num_kernels+j](x, s)
|
||||||
|
x = xs / self.num_kernels
|
||||||
|
x = F.leaky_relu(x)
|
||||||
|
x = self.conv_post(x)
|
||||||
|
spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
|
||||||
|
phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
|
||||||
|
return self.stft.inverse(spec, phase)
|
||||||
|
|
||||||
|
def fw_phase(self, x, s):
|
||||||
|
for i in range(self.num_upsamples):
|
||||||
|
x = F.leaky_relu(x, LRELU_SLOPE)
|
||||||
|
x = self.ups[i](x)
|
||||||
|
xs = None
|
||||||
|
for j in range(self.num_kernels):
|
||||||
|
if xs is None:
|
||||||
|
xs = self.resblocks[i*self.num_kernels+j](x, s)
|
||||||
|
else:
|
||||||
|
xs += self.resblocks[i*self.num_kernels+j](x, s)
|
||||||
|
x = xs / self.num_kernels
|
||||||
|
x = F.leaky_relu(x)
|
||||||
|
x = self.reflection_pad(x)
|
||||||
|
x = self.conv_post(x)
|
||||||
|
spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
|
||||||
|
phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
|
||||||
|
return spec, phase
|
||||||
|
|
||||||
|
def remove_weight_norm(self):
|
||||||
|
print('Removing weight norm...')
|
||||||
|
for l in self.ups:
|
||||||
|
remove_weight_norm(l)
|
||||||
|
for l in self.resblocks:
|
||||||
|
l.remove_weight_norm()
|
||||||
|
remove_weight_norm(self.conv_pre)
|
||||||
|
remove_weight_norm(self.conv_post)
|
||||||
|
|
||||||
|
|
||||||
|
class AdainResBlk1d(nn.Module):
|
||||||
|
def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
|
||||||
|
upsample='none', dropout_p=0.0):
|
||||||
|
super().__init__()
|
||||||
|
self.actv = actv
|
||||||
|
self.upsample_type = upsample
|
||||||
|
self.upsample = UpSample1d(upsample)
|
||||||
|
self.learned_sc = dim_in != dim_out
|
||||||
|
self._build_weights(dim_in, dim_out, style_dim)
|
||||||
|
self.dropout = nn.Dropout(dropout_p)
|
||||||
|
|
||||||
|
if upsample == 'none':
|
||||||
|
self.pool = nn.Identity()
|
||||||
|
else:
|
||||||
|
self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
|
||||||
|
|
||||||
|
|
||||||
|
def _build_weights(self, dim_in, dim_out, style_dim):
|
||||||
|
self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
|
||||||
|
self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
|
||||||
|
self.norm1 = AdaIN1d(style_dim, dim_in)
|
||||||
|
self.norm2 = AdaIN1d(style_dim, dim_out)
|
||||||
|
if self.learned_sc:
|
||||||
|
self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
|
||||||
|
|
||||||
|
def _shortcut(self, x):
|
||||||
|
x = self.upsample(x)
|
||||||
|
if self.learned_sc:
|
||||||
|
x = self.conv1x1(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _residual(self, x, s):
|
||||||
|
x = self.norm1(x, s)
|
||||||
|
x = self.actv(x)
|
||||||
|
x = self.pool(x)
|
||||||
|
x = self.conv1(self.dropout(x))
|
||||||
|
x = self.norm2(x, s)
|
||||||
|
x = self.actv(x)
|
||||||
|
x = self.conv2(self.dropout(x))
|
||||||
|
return x
|
||||||
|
|
||||||
|
def forward(self, x, s):
|
||||||
|
out = self._residual(x, s)
|
||||||
|
out = (out + self._shortcut(x)) / np.sqrt(2)
|
||||||
|
return out
|
||||||
|
|
||||||
|
class UpSample1d(nn.Module):
|
||||||
|
def __init__(self, layer_type):
|
||||||
|
super().__init__()
|
||||||
|
self.layer_type = layer_type
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.layer_type == 'none':
|
||||||
|
return x
|
||||||
|
else:
|
||||||
|
return F.interpolate(x, scale_factor=2, mode='nearest')
|
||||||
|
|
||||||
|
class Decoder(nn.Module):
|
||||||
|
def __init__(self, dim_in=512, F0_channel=512, style_dim=64, dim_out=80,
|
||||||
|
resblock_kernel_sizes = [3,7,11],
|
||||||
|
upsample_rates = [10, 6],
|
||||||
|
upsample_initial_channel=512,
|
||||||
|
resblock_dilation_sizes=[[1,3,5], [1,3,5], [1,3,5]],
|
||||||
|
upsample_kernel_sizes=[20, 12],
|
||||||
|
gen_istft_n_fft=20, gen_istft_hop_size=5):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.decode = nn.ModuleList()
|
||||||
|
|
||||||
|
self.encode = AdainResBlk1d(dim_in + 2, 1024, style_dim)
|
||||||
|
|
||||||
|
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
||||||
|
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
||||||
|
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
||||||
|
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 512, style_dim, upsample=True))
|
||||||
|
|
||||||
|
self.F0_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
|
||||||
|
|
||||||
|
self.N_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
|
||||||
|
|
||||||
|
self.asr_res = nn.Sequential(
|
||||||
|
weight_norm(nn.Conv1d(512, 64, kernel_size=1)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
self.generator = Generator(style_dim, resblock_kernel_sizes, upsample_rates,
|
||||||
|
upsample_initial_channel, resblock_dilation_sizes,
|
||||||
|
upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size)
|
||||||
|
|
||||||
|
def forward(self, asr, F0_curve, N, s):
|
||||||
|
F0 = self.F0_conv(F0_curve.unsqueeze(1))
|
||||||
|
N = self.N_conv(N.unsqueeze(1))
|
||||||
|
|
||||||
|
x = torch.cat([asr, F0, N], axis=1)
|
||||||
|
x = self.encode(x, s)
|
||||||
|
|
||||||
|
asr_res = self.asr_res(asr)
|
||||||
|
|
||||||
|
res = True
|
||||||
|
for block in self.decode:
|
||||||
|
if res:
|
||||||
|
x = torch.cat([x, asr_res, F0, N], axis=1)
|
||||||
|
x = block(x, s)
|
||||||
|
if block.upsample_type != "none":
|
||||||
|
res = False
|
||||||
|
|
||||||
|
x = self.generator(x, s, F0_curve)
|
||||||
|
return x
|
||||||
166
backend/python/kokoro/kokoro.py
Normal file
166
backend/python/kokoro/kokoro.py
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/kokoro.py
|
||||||
|
import phonemizer
|
||||||
|
import re
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def split_num(num):
|
||||||
|
num = num.group()
|
||||||
|
if '.' in num:
|
||||||
|
return num
|
||||||
|
elif ':' in num:
|
||||||
|
h, m = [int(n) for n in num.split(':')]
|
||||||
|
if m == 0:
|
||||||
|
return f"{h} o'clock"
|
||||||
|
elif m < 10:
|
||||||
|
return f'{h} oh {m}'
|
||||||
|
return f'{h} {m}'
|
||||||
|
year = int(num[:4])
|
||||||
|
if year < 1100 or year % 1000 < 10:
|
||||||
|
return num
|
||||||
|
left, right = num[:2], int(num[2:4])
|
||||||
|
s = 's' if num.endswith('s') else ''
|
||||||
|
if 100 <= year % 1000 <= 999:
|
||||||
|
if right == 0:
|
||||||
|
return f'{left} hundred{s}'
|
||||||
|
elif right < 10:
|
||||||
|
return f'{left} oh {right}{s}'
|
||||||
|
return f'{left} {right}{s}'
|
||||||
|
|
||||||
|
def flip_money(m):
|
||||||
|
m = m.group()
|
||||||
|
bill = 'dollar' if m[0] == '$' else 'pound'
|
||||||
|
if m[-1].isalpha():
|
||||||
|
return f'{m[1:]} {bill}s'
|
||||||
|
elif '.' not in m:
|
||||||
|
s = '' if m[1:] == '1' else 's'
|
||||||
|
return f'{m[1:]} {bill}{s}'
|
||||||
|
b, c = m[1:].split('.')
|
||||||
|
s = '' if b == '1' else 's'
|
||||||
|
c = int(c.ljust(2, '0'))
|
||||||
|
coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence')
|
||||||
|
return f'{b} {bill}{s} and {c} {coins}'
|
||||||
|
|
||||||
|
def point_num(num):
|
||||||
|
a, b = num.group().split('.')
|
||||||
|
return ' point '.join([a, ' '.join(b)])
|
||||||
|
|
||||||
|
def normalize_text(text):
|
||||||
|
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
|
||||||
|
text = text.replace('«', chr(8220)).replace('»', chr(8221))
|
||||||
|
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
|
||||||
|
text = text.replace('(', '«').replace(')', '»')
|
||||||
|
for a, b in zip('、。!,:;?', ',.!,:;?'):
|
||||||
|
text = text.replace(a, b+' ')
|
||||||
|
text = re.sub(r'[^\S \n]', ' ', text)
|
||||||
|
text = re.sub(r' +', ' ', text)
|
||||||
|
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
||||||
|
text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
|
||||||
|
text = re.sub(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister', text)
|
||||||
|
text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
|
||||||
|
text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
|
||||||
|
text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
|
||||||
|
text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text)
|
||||||
|
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
||||||
|
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
||||||
|
text = re.sub(r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
||||||
|
text = re.sub(r'\d*\.\d+', point_num, text)
|
||||||
|
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text)
|
||||||
|
text = re.sub(r'(?<=\d)S', ' S', text)
|
||||||
|
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
|
||||||
|
text = re.sub(r"(?<=X')S\b", 's', text)
|
||||||
|
text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
|
||||||
|
text = re.sub(r'(?i)(?<=[A-Z])\.(?=[A-Z])', '-', text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
def get_vocab():
|
||||||
|
_pad = "$"
|
||||||
|
_punctuation = ';:,.!?¡¿—…"«»“” '
|
||||||
|
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
||||||
|
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
|
||||||
|
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
|
||||||
|
dicts = {}
|
||||||
|
for i in range(len((symbols))):
|
||||||
|
dicts[symbols[i]] = i
|
||||||
|
return dicts
|
||||||
|
|
||||||
|
VOCAB = get_vocab()
|
||||||
|
def tokenize(ps):
|
||||||
|
return [i for i in map(VOCAB.get, ps) if i is not None]
|
||||||
|
|
||||||
|
phonemizers = dict(
|
||||||
|
a=phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True),
|
||||||
|
b=phonemizer.backend.EspeakBackend(language='en-gb', preserve_punctuation=True, with_stress=True),
|
||||||
|
)
|
||||||
|
def phonemize(text, lang, norm=True):
|
||||||
|
if norm:
|
||||||
|
text = normalize_text(text)
|
||||||
|
ps = phonemizers[lang].phonemize([text])
|
||||||
|
ps = ps[0] if ps else ''
|
||||||
|
# https://en.wiktionary.org/wiki/kokoro#English
|
||||||
|
ps = ps.replace('kəkˈoːɹoʊ', 'kˈoʊkəɹoʊ').replace('kəkˈɔːɹəʊ', 'kˈəʊkəɹəʊ')
|
||||||
|
ps = ps.replace('ʲ', 'j').replace('r', 'ɹ').replace('x', 'k').replace('ɬ', 'l')
|
||||||
|
ps = re.sub(r'(?<=[a-zɹː])(?=hˈʌndɹɪd)', ' ', ps)
|
||||||
|
ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»“” ]|$)', 'z', ps)
|
||||||
|
if lang == 'a':
|
||||||
|
ps = re.sub(r'(?<=nˈaɪn)ti(?!ː)', 'di', ps)
|
||||||
|
ps = ''.join(filter(lambda p: p in VOCAB, ps))
|
||||||
|
return ps.strip()
|
||||||
|
|
||||||
|
def length_to_mask(lengths):
|
||||||
|
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||||
|
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||||
|
return mask
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def forward(model, tokens, ref_s, speed):
|
||||||
|
device = ref_s.device
|
||||||
|
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
||||||
|
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
||||||
|
text_mask = length_to_mask(input_lengths).to(device)
|
||||||
|
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
|
||||||
|
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
|
||||||
|
s = ref_s[:, 128:]
|
||||||
|
d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
|
||||||
|
x, _ = model.predictor.lstm(d)
|
||||||
|
duration = model.predictor.duration_proj(x)
|
||||||
|
duration = torch.sigmoid(duration).sum(axis=-1) / speed
|
||||||
|
pred_dur = torch.round(duration).clamp(min=1).long()
|
||||||
|
pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
|
||||||
|
c_frame = 0
|
||||||
|
for i in range(pred_aln_trg.size(0)):
|
||||||
|
pred_aln_trg[i, c_frame:c_frame + pred_dur[0,i].item()] = 1
|
||||||
|
c_frame += pred_dur[0,i].item()
|
||||||
|
en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
|
||||||
|
F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
|
||||||
|
t_en = model.text_encoder(tokens, input_lengths, text_mask)
|
||||||
|
asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
|
||||||
|
return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
||||||
|
|
||||||
|
def generate(model, text, voicepack, lang='a', speed=1, ps=None):
|
||||||
|
ps = ps or phonemize(text, lang)
|
||||||
|
tokens = tokenize(ps)
|
||||||
|
if not tokens:
|
||||||
|
return None
|
||||||
|
elif len(tokens) > 510:
|
||||||
|
tokens = tokens[:510]
|
||||||
|
print('Truncated to 510 tokens')
|
||||||
|
ref_s = voicepack[len(tokens)]
|
||||||
|
out = forward(model, tokens, ref_s, speed)
|
||||||
|
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
||||||
|
return out, ps
|
||||||
|
|
||||||
|
def generate_full(model, text, voicepack, lang='a', speed=1, ps=None):
|
||||||
|
ps = ps or phonemize(text, lang)
|
||||||
|
tokens = tokenize(ps)
|
||||||
|
if not tokens:
|
||||||
|
return None
|
||||||
|
outs = []
|
||||||
|
loop_count = len(tokens)//510 + (1 if len(tokens) % 510 != 0 else 0)
|
||||||
|
for i in range(loop_count):
|
||||||
|
ref_s = voicepack[len(tokens[i*510:(i+1)*510])]
|
||||||
|
out = forward(model, tokens[i*510:(i+1)*510], ref_s, speed)
|
||||||
|
outs.append(out)
|
||||||
|
outs = np.concatenate(outs)
|
||||||
|
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
||||||
|
return outs, ps
|
||||||
373
backend/python/kokoro/models.py
Normal file
373
backend/python/kokoro/models.py
Normal file
@@ -0,0 +1,373 @@
|
|||||||
|
# https://github.com/yl4579/StyleTTS2/blob/main/models.py
|
||||||
|
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/models.py
|
||||||
|
from istftnet import AdaIN1d, Decoder
|
||||||
|
from munch import Munch
|
||||||
|
from pathlib import Path
|
||||||
|
from plbert import load_plbert
|
||||||
|
from torch.nn.utils import weight_norm, spectral_norm
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import os.path as osp
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class LinearNorm(torch.nn.Module):
|
||||||
|
def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
|
||||||
|
super(LinearNorm, self).__init__()
|
||||||
|
self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
|
||||||
|
|
||||||
|
torch.nn.init.xavier_uniform_(
|
||||||
|
self.linear_layer.weight,
|
||||||
|
gain=torch.nn.init.calculate_gain(w_init_gain))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.linear_layer(x)
|
||||||
|
|
||||||
|
class LayerNorm(nn.Module):
|
||||||
|
def __init__(self, channels, eps=1e-5):
|
||||||
|
super().__init__()
|
||||||
|
self.channels = channels
|
||||||
|
self.eps = eps
|
||||||
|
|
||||||
|
self.gamma = nn.Parameter(torch.ones(channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = x.transpose(1, -1)
|
||||||
|
x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
|
||||||
|
return x.transpose(1, -1)
|
||||||
|
|
||||||
|
class TextEncoder(nn.Module):
|
||||||
|
def __init__(self, channels, kernel_size, depth, n_symbols, actv=nn.LeakyReLU(0.2)):
|
||||||
|
super().__init__()
|
||||||
|
self.embedding = nn.Embedding(n_symbols, channels)
|
||||||
|
|
||||||
|
padding = (kernel_size - 1) // 2
|
||||||
|
self.cnn = nn.ModuleList()
|
||||||
|
for _ in range(depth):
|
||||||
|
self.cnn.append(nn.Sequential(
|
||||||
|
weight_norm(nn.Conv1d(channels, channels, kernel_size=kernel_size, padding=padding)),
|
||||||
|
LayerNorm(channels),
|
||||||
|
actv,
|
||||||
|
nn.Dropout(0.2),
|
||||||
|
))
|
||||||
|
# self.cnn = nn.Sequential(*self.cnn)
|
||||||
|
|
||||||
|
self.lstm = nn.LSTM(channels, channels//2, 1, batch_first=True, bidirectional=True)
|
||||||
|
|
||||||
|
def forward(self, x, input_lengths, m):
|
||||||
|
x = self.embedding(x) # [B, T, emb]
|
||||||
|
x = x.transpose(1, 2) # [B, emb, T]
|
||||||
|
m = m.to(input_lengths.device).unsqueeze(1)
|
||||||
|
x.masked_fill_(m, 0.0)
|
||||||
|
|
||||||
|
for c in self.cnn:
|
||||||
|
x = c(x)
|
||||||
|
x.masked_fill_(m, 0.0)
|
||||||
|
|
||||||
|
x = x.transpose(1, 2) # [B, T, chn]
|
||||||
|
|
||||||
|
input_lengths = input_lengths.cpu().numpy()
|
||||||
|
x = nn.utils.rnn.pack_padded_sequence(
|
||||||
|
x, input_lengths, batch_first=True, enforce_sorted=False)
|
||||||
|
|
||||||
|
self.lstm.flatten_parameters()
|
||||||
|
x, _ = self.lstm(x)
|
||||||
|
x, _ = nn.utils.rnn.pad_packed_sequence(
|
||||||
|
x, batch_first=True)
|
||||||
|
|
||||||
|
x = x.transpose(-1, -2)
|
||||||
|
x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
|
||||||
|
|
||||||
|
x_pad[:, :, :x.shape[-1]] = x
|
||||||
|
x = x_pad.to(x.device)
|
||||||
|
|
||||||
|
x.masked_fill_(m, 0.0)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
def inference(self, x):
|
||||||
|
x = self.embedding(x)
|
||||||
|
x = x.transpose(1, 2)
|
||||||
|
x = self.cnn(x)
|
||||||
|
x = x.transpose(1, 2)
|
||||||
|
self.lstm.flatten_parameters()
|
||||||
|
x, _ = self.lstm(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def length_to_mask(self, lengths):
|
||||||
|
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||||
|
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||||
|
return mask
|
||||||
|
|
||||||
|
|
||||||
|
class UpSample1d(nn.Module):
|
||||||
|
def __init__(self, layer_type):
|
||||||
|
super().__init__()
|
||||||
|
self.layer_type = layer_type
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.layer_type == 'none':
|
||||||
|
return x
|
||||||
|
else:
|
||||||
|
return F.interpolate(x, scale_factor=2, mode='nearest')
|
||||||
|
|
||||||
|
class AdainResBlk1d(nn.Module):
|
||||||
|
def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
|
||||||
|
upsample='none', dropout_p=0.0):
|
||||||
|
super().__init__()
|
||||||
|
self.actv = actv
|
||||||
|
self.upsample_type = upsample
|
||||||
|
self.upsample = UpSample1d(upsample)
|
||||||
|
self.learned_sc = dim_in != dim_out
|
||||||
|
self._build_weights(dim_in, dim_out, style_dim)
|
||||||
|
self.dropout = nn.Dropout(dropout_p)
|
||||||
|
|
||||||
|
if upsample == 'none':
|
||||||
|
self.pool = nn.Identity()
|
||||||
|
else:
|
||||||
|
self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
|
||||||
|
|
||||||
|
|
||||||
|
def _build_weights(self, dim_in, dim_out, style_dim):
|
||||||
|
self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
|
||||||
|
self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
|
||||||
|
self.norm1 = AdaIN1d(style_dim, dim_in)
|
||||||
|
self.norm2 = AdaIN1d(style_dim, dim_out)
|
||||||
|
if self.learned_sc:
|
||||||
|
self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
|
||||||
|
|
||||||
|
def _shortcut(self, x):
|
||||||
|
x = self.upsample(x)
|
||||||
|
if self.learned_sc:
|
||||||
|
x = self.conv1x1(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _residual(self, x, s):
|
||||||
|
x = self.norm1(x, s)
|
||||||
|
x = self.actv(x)
|
||||||
|
x = self.pool(x)
|
||||||
|
x = self.conv1(self.dropout(x))
|
||||||
|
x = self.norm2(x, s)
|
||||||
|
x = self.actv(x)
|
||||||
|
x = self.conv2(self.dropout(x))
|
||||||
|
return x
|
||||||
|
|
||||||
|
def forward(self, x, s):
|
||||||
|
out = self._residual(x, s)
|
||||||
|
out = (out + self._shortcut(x)) / np.sqrt(2)
|
||||||
|
return out
|
||||||
|
|
||||||
|
class AdaLayerNorm(nn.Module):
|
||||||
|
def __init__(self, style_dim, channels, eps=1e-5):
|
||||||
|
super().__init__()
|
||||||
|
self.channels = channels
|
||||||
|
self.eps = eps
|
||||||
|
|
||||||
|
self.fc = nn.Linear(style_dim, channels*2)
|
||||||
|
|
||||||
|
def forward(self, x, s):
|
||||||
|
x = x.transpose(-1, -2)
|
||||||
|
x = x.transpose(1, -1)
|
||||||
|
|
||||||
|
h = self.fc(s)
|
||||||
|
h = h.view(h.size(0), h.size(1), 1)
|
||||||
|
gamma, beta = torch.chunk(h, chunks=2, dim=1)
|
||||||
|
gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1)
|
||||||
|
|
||||||
|
|
||||||
|
x = F.layer_norm(x, (self.channels,), eps=self.eps)
|
||||||
|
x = (1 + gamma) * x + beta
|
||||||
|
return x.transpose(1, -1).transpose(-1, -2)
|
||||||
|
|
||||||
|
class ProsodyPredictor(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, style_dim, d_hid, nlayers, max_dur=50, dropout=0.1):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.text_encoder = DurationEncoder(sty_dim=style_dim,
|
||||||
|
d_model=d_hid,
|
||||||
|
nlayers=nlayers,
|
||||||
|
dropout=dropout)
|
||||||
|
|
||||||
|
self.lstm = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
|
||||||
|
self.duration_proj = LinearNorm(d_hid, max_dur)
|
||||||
|
|
||||||
|
self.shared = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
|
||||||
|
self.F0 = nn.ModuleList()
|
||||||
|
self.F0.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
|
||||||
|
self.F0.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
|
||||||
|
self.F0.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
|
||||||
|
|
||||||
|
self.N = nn.ModuleList()
|
||||||
|
self.N.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
|
||||||
|
self.N.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
|
||||||
|
self.N.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
|
||||||
|
|
||||||
|
self.F0_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
|
||||||
|
self.N_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, texts, style, text_lengths, alignment, m):
|
||||||
|
d = self.text_encoder(texts, style, text_lengths, m)
|
||||||
|
|
||||||
|
batch_size = d.shape[0]
|
||||||
|
text_size = d.shape[1]
|
||||||
|
|
||||||
|
# predict duration
|
||||||
|
input_lengths = text_lengths.cpu().numpy()
|
||||||
|
x = nn.utils.rnn.pack_padded_sequence(
|
||||||
|
d, input_lengths, batch_first=True, enforce_sorted=False)
|
||||||
|
|
||||||
|
m = m.to(text_lengths.device).unsqueeze(1)
|
||||||
|
|
||||||
|
self.lstm.flatten_parameters()
|
||||||
|
x, _ = self.lstm(x)
|
||||||
|
x, _ = nn.utils.rnn.pad_packed_sequence(
|
||||||
|
x, batch_first=True)
|
||||||
|
|
||||||
|
x_pad = torch.zeros([x.shape[0], m.shape[-1], x.shape[-1]])
|
||||||
|
|
||||||
|
x_pad[:, :x.shape[1], :] = x
|
||||||
|
x = x_pad.to(x.device)
|
||||||
|
|
||||||
|
duration = self.duration_proj(nn.functional.dropout(x, 0.5, training=self.training))
|
||||||
|
|
||||||
|
en = (d.transpose(-1, -2) @ alignment)
|
||||||
|
|
||||||
|
return duration.squeeze(-1), en
|
||||||
|
|
||||||
|
def F0Ntrain(self, x, s):
|
||||||
|
x, _ = self.shared(x.transpose(-1, -2))
|
||||||
|
|
||||||
|
F0 = x.transpose(-1, -2)
|
||||||
|
for block in self.F0:
|
||||||
|
F0 = block(F0, s)
|
||||||
|
F0 = self.F0_proj(F0)
|
||||||
|
|
||||||
|
N = x.transpose(-1, -2)
|
||||||
|
for block in self.N:
|
||||||
|
N = block(N, s)
|
||||||
|
N = self.N_proj(N)
|
||||||
|
|
||||||
|
return F0.squeeze(1), N.squeeze(1)
|
||||||
|
|
||||||
|
def length_to_mask(self, lengths):
|
||||||
|
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||||
|
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||||
|
return mask
|
||||||
|
|
||||||
|
class DurationEncoder(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, sty_dim, d_model, nlayers, dropout=0.1):
|
||||||
|
super().__init__()
|
||||||
|
self.lstms = nn.ModuleList()
|
||||||
|
for _ in range(nlayers):
|
||||||
|
self.lstms.append(nn.LSTM(d_model + sty_dim,
|
||||||
|
d_model // 2,
|
||||||
|
num_layers=1,
|
||||||
|
batch_first=True,
|
||||||
|
bidirectional=True,
|
||||||
|
dropout=dropout))
|
||||||
|
self.lstms.append(AdaLayerNorm(sty_dim, d_model))
|
||||||
|
|
||||||
|
|
||||||
|
self.dropout = dropout
|
||||||
|
self.d_model = d_model
|
||||||
|
self.sty_dim = sty_dim
|
||||||
|
|
||||||
|
def forward(self, x, style, text_lengths, m):
|
||||||
|
masks = m.to(text_lengths.device)
|
||||||
|
|
||||||
|
x = x.permute(2, 0, 1)
|
||||||
|
s = style.expand(x.shape[0], x.shape[1], -1)
|
||||||
|
x = torch.cat([x, s], axis=-1)
|
||||||
|
x.masked_fill_(masks.unsqueeze(-1).transpose(0, 1), 0.0)
|
||||||
|
|
||||||
|
x = x.transpose(0, 1)
|
||||||
|
input_lengths = text_lengths.cpu().numpy()
|
||||||
|
x = x.transpose(-1, -2)
|
||||||
|
|
||||||
|
for block in self.lstms:
|
||||||
|
if isinstance(block, AdaLayerNorm):
|
||||||
|
x = block(x.transpose(-1, -2), style).transpose(-1, -2)
|
||||||
|
x = torch.cat([x, s.permute(1, -1, 0)], axis=1)
|
||||||
|
x.masked_fill_(masks.unsqueeze(-1).transpose(-1, -2), 0.0)
|
||||||
|
else:
|
||||||
|
x = x.transpose(-1, -2)
|
||||||
|
x = nn.utils.rnn.pack_padded_sequence(
|
||||||
|
x, input_lengths, batch_first=True, enforce_sorted=False)
|
||||||
|
block.flatten_parameters()
|
||||||
|
x, _ = block(x)
|
||||||
|
x, _ = nn.utils.rnn.pad_packed_sequence(
|
||||||
|
x, batch_first=True)
|
||||||
|
x = F.dropout(x, p=self.dropout, training=self.training)
|
||||||
|
x = x.transpose(-1, -2)
|
||||||
|
|
||||||
|
x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
|
||||||
|
|
||||||
|
x_pad[:, :, :x.shape[-1]] = x
|
||||||
|
x = x_pad.to(x.device)
|
||||||
|
|
||||||
|
return x.transpose(-1, -2)
|
||||||
|
|
||||||
|
def inference(self, x, style):
|
||||||
|
x = self.embedding(x.transpose(-1, -2)) * np.sqrt(self.d_model)
|
||||||
|
style = style.expand(x.shape[0], x.shape[1], -1)
|
||||||
|
x = torch.cat([x, style], axis=-1)
|
||||||
|
src = self.pos_encoder(x)
|
||||||
|
output = self.transformer_encoder(src).transpose(0, 1)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def length_to_mask(self, lengths):
|
||||||
|
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||||
|
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||||
|
return mask
|
||||||
|
|
||||||
|
# https://github.com/yl4579/StyleTTS2/blob/main/utils.py
|
||||||
|
def recursive_munch(d):
|
||||||
|
if isinstance(d, dict):
|
||||||
|
return Munch((k, recursive_munch(v)) for k, v in d.items())
|
||||||
|
elif isinstance(d, list):
|
||||||
|
return [recursive_munch(v) for v in d]
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
def build_model(path, device):
|
||||||
|
config = Path(__file__).parent / 'config.json'
|
||||||
|
assert config.exists(), f'Config path incorrect: config.json not found at {config}'
|
||||||
|
with open(config, 'r') as r:
|
||||||
|
args = recursive_munch(json.load(r))
|
||||||
|
assert args.decoder.type == 'istftnet', f'Unknown decoder type: {args.decoder.type}'
|
||||||
|
decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels,
|
||||||
|
resblock_kernel_sizes = args.decoder.resblock_kernel_sizes,
|
||||||
|
upsample_rates = args.decoder.upsample_rates,
|
||||||
|
upsample_initial_channel=args.decoder.upsample_initial_channel,
|
||||||
|
resblock_dilation_sizes=args.decoder.resblock_dilation_sizes,
|
||||||
|
upsample_kernel_sizes=args.decoder.upsample_kernel_sizes,
|
||||||
|
gen_istft_n_fft=args.decoder.gen_istft_n_fft, gen_istft_hop_size=args.decoder.gen_istft_hop_size)
|
||||||
|
text_encoder = TextEncoder(channels=args.hidden_dim, kernel_size=5, depth=args.n_layer, n_symbols=args.n_token)
|
||||||
|
predictor = ProsodyPredictor(style_dim=args.style_dim, d_hid=args.hidden_dim, nlayers=args.n_layer, max_dur=args.max_dur, dropout=args.dropout)
|
||||||
|
bert = load_plbert()
|
||||||
|
bert_encoder = nn.Linear(bert.config.hidden_size, args.hidden_dim)
|
||||||
|
for parent in [bert, bert_encoder, predictor, decoder, text_encoder]:
|
||||||
|
for child in parent.children():
|
||||||
|
if isinstance(child, nn.RNNBase):
|
||||||
|
child.flatten_parameters()
|
||||||
|
model = Munch(
|
||||||
|
bert=bert.to(device).eval(),
|
||||||
|
bert_encoder=bert_encoder.to(device).eval(),
|
||||||
|
predictor=predictor.to(device).eval(),
|
||||||
|
decoder=decoder.to(device).eval(),
|
||||||
|
text_encoder=text_encoder.to(device).eval(),
|
||||||
|
)
|
||||||
|
for key, state_dict in torch.load(path, map_location='cpu', weights_only=True)['net'].items():
|
||||||
|
assert key in model, key
|
||||||
|
try:
|
||||||
|
model[key].load_state_dict(state_dict)
|
||||||
|
except:
|
||||||
|
state_dict = {k[7:]: v for k, v in state_dict.items()}
|
||||||
|
model[key].load_state_dict(state_dict, strict=False)
|
||||||
|
return model
|
||||||
16
backend/python/kokoro/plbert.py
Normal file
16
backend/python/kokoro/plbert.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/plbert.py
|
||||||
|
# https://github.com/yl4579/StyleTTS2/blob/main/Utils/PLBERT/util.py
|
||||||
|
from transformers import AlbertConfig, AlbertModel
|
||||||
|
|
||||||
|
class CustomAlbert(AlbertModel):
|
||||||
|
def forward(self, *args, **kwargs):
|
||||||
|
# Call the original forward method
|
||||||
|
outputs = super().forward(*args, **kwargs)
|
||||||
|
# Only return the last_hidden_state
|
||||||
|
return outputs.last_hidden_state
|
||||||
|
|
||||||
|
def load_plbert():
|
||||||
|
plbert_config = {'vocab_size': 178, 'hidden_size': 768, 'num_attention_heads': 12, 'intermediate_size': 2048, 'max_position_embeddings': 512, 'num_hidden_layers': 12, 'dropout': 0.1}
|
||||||
|
albert_base_configuration = AlbertConfig(**plbert_config)
|
||||||
|
bert = CustomAlbert(albert_base_configuration)
|
||||||
|
return bert
|
||||||
6
backend/python/kokoro/protogen.sh
Normal file
6
backend/python/kokoro/protogen.sh
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
source $(dirname $0)/../common/libbackend.sh
|
||||||
|
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
@@ -1,5 +1,3 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.4.1+rocm6.0
|
torch==2.4.1+rocm6.0
|
||||||
accelerate
|
|
||||||
sentence-transformers==3.3.1
|
|
||||||
transformers
|
transformers
|
||||||
5
backend/python/kokoro/requirements-intel.txt
Normal file
5
backend/python/kokoro/requirements-intel.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
|
torch==2.3.1+cxx11.abi
|
||||||
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
|
transformers
|
||||||
3
backend/python/kokoro/requirements-l4t.txt
Normal file
3
backend/python/kokoro/requirements-l4t.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
7
backend/python/kokoro/requirements.txt
Normal file
7
backend/python/kokoro/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
grpcio==1.70.0
|
||||||
|
protobuf
|
||||||
|
phonemizer
|
||||||
|
scipy
|
||||||
|
munch
|
||||||
|
setuptools
|
||||||
|
soundfile
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
.PHONY: mamba
|
|
||||||
mamba: protogen
|
|
||||||
bash install.sh
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running mamba..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "mamba run."
|
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing mamba..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "mamba tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
$(RM) -r venv __pycache__
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
# Creating a separate environment for the mamba project
|
|
||||||
|
|
||||||
```
|
|
||||||
make mamba
|
|
||||||
```
|
|
||||||
@@ -1,179 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
from concurrent import futures
|
|
||||||
import time
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
||||||
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer that implements the Backend service defined in backend.proto.
|
|
||||||
"""
|
|
||||||
def generate(self,prompt, max_new_tokens):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and maximum number of new tokens.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt (str): The prompt to generate text from.
|
|
||||||
max_new_tokens (int): The maximum number of new tokens to generate.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The generated text.
|
|
||||||
"""
|
|
||||||
self.generator.end_beam_search()
|
|
||||||
|
|
||||||
# Tokenizing the input
|
|
||||||
ids = self.generator.tokenizer.encode(prompt)
|
|
||||||
|
|
||||||
self.generator.gen_begin_reuse(ids)
|
|
||||||
initial_len = self.generator.sequence[0].shape[0]
|
|
||||||
has_leading_space = False
|
|
||||||
decoded_text = ''
|
|
||||||
for i in range(max_new_tokens):
|
|
||||||
token = self.generator.gen_single_token()
|
|
||||||
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
|
||||||
has_leading_space = True
|
|
||||||
|
|
||||||
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
|
|
||||||
if has_leading_space:
|
|
||||||
decoded_text = ' ' + decoded_text
|
|
||||||
|
|
||||||
if token.item() == self.generator.tokenizer.eos_token_id:
|
|
||||||
break
|
|
||||||
return decoded_text
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
Returns a health check message.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The health check request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Reply: The health check reply.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
Loads a language model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The load model request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The load model result.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
tokenizerModel = request.Tokenizer
|
|
||||||
if tokenizerModel == "":
|
|
||||||
tokenizerModel = request.Model
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
|
|
||||||
if MAMBA_CHAT:
|
|
||||||
tokenizer.eos_token = "<|endoftext|>"
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict result.
|
|
||||||
"""
|
|
||||||
if request.TopP == 0:
|
|
||||||
request.TopP = 0.9
|
|
||||||
|
|
||||||
max_tokens = request.Tokens
|
|
||||||
|
|
||||||
if request.Tokens == 0:
|
|
||||||
max_tokens = 2000
|
|
||||||
|
|
||||||
# encoded_input = self.tokenizer(request.Prompt)
|
|
||||||
tokens = self.tokenizer(request.Prompt, return_tensors="pt")
|
|
||||||
input_ids = tokens.input_ids.to(device="cuda")
|
|
||||||
out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
|
|
||||||
top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
|
|
||||||
|
|
||||||
decoded = self.tokenizer.batch_decode(out)
|
|
||||||
|
|
||||||
generated_text = decoded[0]
|
|
||||||
|
|
||||||
# Remove prompt from response if present
|
|
||||||
if request.Prompt in generated_text:
|
|
||||||
generated_text = generated_text.replace(request.Prompt, "")
|
|
||||||
|
|
||||||
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict stream request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict stream result.
|
|
||||||
"""
|
|
||||||
yield self.Predict(request, context)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
LIMIT_TARGETS="cublas"
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
causal-conv1d==1.4.0
|
|
||||||
mamba-ssm==2.2.2
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
|
|
||||||
# this also means that we need to install the basic build dependencies into the venv ourselves
|
|
||||||
# https://github.com/Dao-AILab/causal-conv1d/issues/24
|
|
||||||
packaging
|
|
||||||
setuptools
|
|
||||||
wheel
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
grpcio==1.68.1
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
LIMIT_TARGETS="cublas"
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import grpc
|
|
||||||
import backend_pb2_grpc
|
|
||||||
import backend_pb2
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service.
|
|
||||||
|
|
||||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
self.service.terminate()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_text(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
req = backend_pb2.PredictOptions(Prompt="The capital of France is")
|
|
||||||
resp = stub.Predict(req)
|
|
||||||
self.assertIsNotNone(resp.message)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("text service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
@@ -1,158 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Extra gRPC server for OpenVoice models.
|
|
||||||
"""
|
|
||||||
from concurrent import futures
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import torch
|
|
||||||
from openvoice import se_extractor
|
|
||||||
from openvoice.api import ToneColorConverter
|
|
||||||
from melo.api import TTS
|
|
||||||
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer for the backend service.
|
|
||||||
|
|
||||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
|
||||||
"""
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that returns the health status of the backend service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A HealthRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Reply object that contains the health status of the backend service.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that loads a model into memory.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A LoadModelRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Result object that contains the result of the LoadModel operation.
|
|
||||||
"""
|
|
||||||
model_name = request.Model
|
|
||||||
try:
|
|
||||||
|
|
||||||
self.clonedVoice = False
|
|
||||||
# Assume directory from request.ModelFile.
|
|
||||||
# Only if request.LoraAdapter it's not an absolute path
|
|
||||||
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
|
|
||||||
# get base path of modelFile
|
|
||||||
modelFileBase = os.path.dirname(request.ModelFile)
|
|
||||||
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
|
||||||
if request.AudioPath != "":
|
|
||||||
self.clonedVoice = True
|
|
||||||
|
|
||||||
self.modelpath = request.ModelFile
|
|
||||||
self.speaker = request.Type
|
|
||||||
self.ClonedVoicePath = request.AudioPath
|
|
||||||
|
|
||||||
ckpt_converter = request.Model+'/converter'
|
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
||||||
self.device = device
|
|
||||||
self.tone_color_converter = None
|
|
||||||
if self.clonedVoice:
|
|
||||||
self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
|
|
||||||
self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
model_name = request.model
|
|
||||||
if model_name == "":
|
|
||||||
return backend_pb2.Result(success=False, message="request.model is required")
|
|
||||||
try:
|
|
||||||
# Speed is adjustable
|
|
||||||
speed = 1.0
|
|
||||||
voice = "EN"
|
|
||||||
if request.voice:
|
|
||||||
voice = request.voice
|
|
||||||
model = TTS(language=voice, device=self.device)
|
|
||||||
speaker_ids = model.hps.data.spk2id
|
|
||||||
speaker_key = self.speaker
|
|
||||||
modelpath = self.modelpath
|
|
||||||
for s in speaker_ids.keys():
|
|
||||||
print(f"Speaker: {s} - ID: {speaker_ids[s]}")
|
|
||||||
speaker_id = speaker_ids[speaker_key]
|
|
||||||
speaker_key = speaker_key.lower().replace('_', '-')
|
|
||||||
source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
|
|
||||||
model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
|
|
||||||
if self.clonedVoice:
|
|
||||||
reference_speaker = self.ClonedVoicePath
|
|
||||||
target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
|
|
||||||
# Run the tone color converter
|
|
||||||
encode_message = "@MyShell"
|
|
||||||
self.tone_color_converter.convert(
|
|
||||||
audio_src_path=request.dst,
|
|
||||||
src_se=source_se,
|
|
||||||
tgt_se=target_se,
|
|
||||||
output_path=request.dst,
|
|
||||||
message=encode_message)
|
|
||||||
|
|
||||||
print("[OpenVoice] TTS generated!", file=sys.stderr)
|
|
||||||
print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
|
|
||||||
print(request, file=sys.stderr)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(success=True)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("[OpenVoice] Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
print(f"[OpenVoice] startup: {args}", file=sys.stderr)
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
|
|
||||||
python -m unidic download
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
torch==2.4.1+rocm6.0
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch
|
|
||||||
torch
|
|
||||||
optimum[openvino]
|
|
||||||
grpcio==1.68.1
|
|
||||||
protobuf
|
|
||||||
librosa==0.9.1
|
|
||||||
faster-whisper==0.9.0
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
numpy==1.22.0
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
inflect==7.0.0
|
|
||||||
unidecode==1.3.7
|
|
||||||
whisper-timestamped==1.14.2
|
|
||||||
openai
|
|
||||||
python-dotenv
|
|
||||||
pypinyin==0.50.0
|
|
||||||
cn2an==0.5.22
|
|
||||||
jieba==0.42.1
|
|
||||||
langid==1.1.6
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
grpcio==1.68.1
|
|
||||||
protobuf
|
|
||||||
librosa
|
|
||||||
faster-whisper
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
numpy==1.22.0
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
inflect
|
|
||||||
unidecode
|
|
||||||
whisper-timestamped
|
|
||||||
openai
|
|
||||||
python-dotenv
|
|
||||||
pypinyin
|
|
||||||
cn2an==0.5.22
|
|
||||||
networkx==2.8.8
|
|
||||||
jieba==0.42.1
|
|
||||||
gradio==3.48.0
|
|
||||||
langid==1.1.6
|
|
||||||
llvmlite==0.43.0
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
"""
|
|
||||||
A test script to test the gRPC service
|
|
||||||
"""
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
"""
|
|
||||||
This method sets up the gRPC service by starting the server
|
|
||||||
"""
|
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(30)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
"""
|
|
||||||
This method tears down the gRPC service by terminating the server
|
|
||||||
"""
|
|
||||||
self.service.terminate()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
"""
|
|
||||||
This method tests if the server starts up successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2",
|
|
||||||
Type="en-us"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_tts(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
|
|
||||||
tts_response = stub.TTS(tts_request)
|
|
||||||
self.assertIsNotNone(tts_response)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("TTS service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# Download checkpoints if not present
|
|
||||||
if [ ! -d "checkpoints_v2" ]; then
|
|
||||||
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
|
|
||||||
unzip checkpoints_v2.zip
|
|
||||||
fi
|
|
||||||
|
|
||||||
runUnittests
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
export CONDA_ENV_PATH = "parler.yml"
|
|
||||||
SKIP_CONDA?=0
|
|
||||||
ifeq ($(BUILD_TYPE), cublas)
|
|
||||||
export CONDA_ENV_PATH = "parler-nvidia.yml"
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Intel GPU are supposed to have dependencies installed in the main python
|
|
||||||
# environment, so we skip conda installation for SYCL builds.
|
|
||||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
export SKIP_CONDA=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
.PHONY: parler-tts
|
|
||||||
parler-tts:
|
|
||||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
|
||||||
bash install.sh $(CONDA_ENV_PATH)
|
|
||||||
$(MAKE) protogen
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running transformers..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "transformers run."
|
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing transformers..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "transformers tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
bash protogen.sh
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
$(RM) -r venv __pycache__
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
|
|
||||||
|
|
||||||
# https://github.com/descriptinc/audiotools/issues/101
|
|
||||||
# incompatible protobuf versions.
|
|
||||||
PYDIR=python3.10
|
|
||||||
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
|
||||||
|
|
||||||
if [ ! -d ${pyenv} ]; then
|
|
||||||
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
|
||||||
llvmlite==0.43.0
|
|
||||||
numba==0.60.0
|
|
||||||
grpcio-tools==1.42.0
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
transformers
|
|
||||||
accelerate
|
|
||||||
torch==2.4.1
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
torchaudio==2.4.1+cu118
|
|
||||||
transformers
|
|
||||||
accelerate
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user