mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-06 12:43:04 -05:00
Compare commits
1 Commits
speculativ
...
fix/closed
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83110891fd |
@@ -1,11 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: model delete
|
|
||||||
type: http
|
|
||||||
seq: 7
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
|
||||||
body: none
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
Binary file not shown.
@@ -1,16 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: transcribe
|
|
||||||
type: http
|
|
||||||
seq: 1
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
|
||||||
body: multipartForm
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
body:multipart-form {
|
|
||||||
file: @file(transcription/gb1.ogg)
|
|
||||||
model: whisper-1
|
|
||||||
}
|
|
||||||
@@ -7,7 +7,7 @@ services:
|
|||||||
args:
|
args:
|
||||||
- FFMPEG=true
|
- FFMPEG=true
|
||||||
- IMAGE_TYPE=extras
|
- IMAGE_TYPE=extras
|
||||||
- GO_TAGS=p2p tts
|
- GO_TAGS=stablediffusion p2p tts
|
||||||
env_file:
|
env_file:
|
||||||
- ../.env
|
- ../.env
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
15
.env
15
.env
@@ -38,12 +38,12 @@
|
|||||||
## Uncomment and set to true to enable rebuilding from source
|
## Uncomment and set to true to enable rebuilding from source
|
||||||
# REBUILD=true
|
# REBUILD=true
|
||||||
|
|
||||||
## Enable go tags, available: p2p, tts
|
## Enable go tags, available: stablediffusion, tts
|
||||||
## p2p: enable distributed inferencing
|
## stablediffusion: image generation with stablediffusion
|
||||||
## tts: enables text-to-speech with go-piper
|
## tts: enables text-to-speech with go-piper
|
||||||
## (requires REBUILD=true)
|
## (requires REBUILD=true)
|
||||||
#
|
#
|
||||||
# GO_TAGS=p2p
|
# GO_TAGS=stablediffusion
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
@@ -82,15 +82,6 @@
|
|||||||
# Enable to allow p2p mode
|
# Enable to allow p2p mode
|
||||||
# LOCALAI_P2P=true
|
# LOCALAI_P2P=true
|
||||||
|
|
||||||
# Enable to use federated mode
|
|
||||||
# LOCALAI_FEDERATED=true
|
|
||||||
|
|
||||||
# Enable to start federation server
|
|
||||||
# FEDERATED_SERVER=true
|
|
||||||
|
|
||||||
# Define to use federation token
|
|
||||||
# TOKEN=""
|
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
|
|||||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,2 +1 @@
|
|||||||
*.sh text eol=lf
|
*.sh text eol=lf
|
||||||
backend/cpp/llama/*.hpp linguist-vendored
|
|
||||||
7
.github/ci/modelslist.go
vendored
7
.github/ci/modelslist.go
vendored
@@ -6,7 +6,6 @@ import (
|
|||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/microcosm-cc/bluemonday"
|
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -280,12 +279,6 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that all arbitrary text content is sanitized before display
|
|
||||||
for i, m := range models {
|
|
||||||
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
|
|
||||||
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
|
|
||||||
}
|
|
||||||
|
|
||||||
// render the template
|
// render the template
|
||||||
data := struct {
|
data := struct {
|
||||||
Models []*GalleryModel
|
Models []*GalleryModel
|
||||||
|
|||||||
10
.github/dependabot.yml
vendored
10
.github/dependabot.yml
vendored
@@ -9,8 +9,6 @@ updates:
|
|||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
ignore:
|
|
||||||
- dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
- package-ecosystem: "github-actions"
|
- package-ecosystem: "github-actions"
|
||||||
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
||||||
directory: "/"
|
directory: "/"
|
||||||
@@ -81,6 +79,14 @@ updates:
|
|||||||
directory: "/backend/python/transformers"
|
directory: "/backend/python/transformers"
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/transformers-musicgen"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/vall-e-x"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
- package-ecosystem: "pip"
|
- package-ecosystem: "pip"
|
||||||
directory: "/backend/python/vllm"
|
directory: "/backend/python/vllm"
|
||||||
schedule:
|
schedule:
|
||||||
|
|||||||
9
.github/labeler.yml
vendored
9
.github/labeler.yml
vendored
@@ -1,15 +1,6 @@
|
|||||||
enhancements:
|
enhancements:
|
||||||
- head-branch: ['^feature', 'feature']
|
- head-branch: ['^feature', 'feature']
|
||||||
|
|
||||||
dependencies:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'Makefile'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.mod'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.sum'
|
|
||||||
|
|
||||||
kind/documentation:
|
kind/documentation:
|
||||||
- any:
|
- any:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
|
|||||||
17
.github/workflows/bump_deps.yaml
vendored
17
.github/workflows/bump_deps.yaml
vendored
@@ -12,14 +12,23 @@ jobs:
|
|||||||
- repository: "ggerganov/llama.cpp"
|
- repository: "ggerganov/llama.cpp"
|
||||||
variable: "CPPLLAMA_VERSION"
|
variable: "CPPLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||||
|
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
- repository: "donomii/go-rwkv.cpp"
|
||||||
|
variable: "RWKV_VERSION"
|
||||||
|
branch: "main"
|
||||||
- repository: "ggerganov/whisper.cpp"
|
- repository: "ggerganov/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "PABannier/bark.cpp"
|
- repository: "go-skynet/go-bert.cpp"
|
||||||
variable: "BARKCPP_VERSION"
|
variable: "BERT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
- repository: "go-skynet/bloomz.cpp"
|
||||||
|
variable: "BLOOMZ_VERSION"
|
||||||
branch: "main"
|
branch: "main"
|
||||||
- repository: "leejet/stable-diffusion.cpp"
|
- repository: "mudler/go-ggllm.cpp"
|
||||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
variable: "GOGGLLM_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "mudler/go-stable-diffusion"
|
- repository: "mudler/go-stable-diffusion"
|
||||||
variable: "STABLEDIFFUSION_VERSION"
|
variable: "STABLEDIFFUSION_VERSION"
|
||||||
|
|||||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -23,7 +23,7 @@ jobs:
|
|||||||
sudo pip install --upgrade pip
|
sudo pip install --upgrade pip
|
||||||
pip install huggingface_hub
|
pip install huggingface_hub
|
||||||
- name: 'Setup yq'
|
- name: 'Setup yq'
|
||||||
uses: dcarbone/install-yq-action@v1.3.1
|
uses: dcarbone/install-yq-action@v1.1.1
|
||||||
with:
|
with:
|
||||||
version: 'v4.44.2'
|
version: 'v4.44.2'
|
||||||
download-compressed: true
|
download-compressed: true
|
||||||
|
|||||||
4
.github/workflows/deploy-explorer.yaml
vendored
4
.github/workflows/deploy-explorer.yaml
vendored
@@ -33,7 +33,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
CGO_ENABLED=0 make build-api
|
CGO_ENABLED=0 make build-api
|
||||||
- name: rm
|
- name: rm
|
||||||
uses: appleboy/ssh-action@v1.2.0
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
@@ -53,7 +53,7 @@ jobs:
|
|||||||
rm: true
|
rm: true
|
||||||
target: ./local-ai
|
target: ./local-ai
|
||||||
- name: restarting
|
- name: restarting
|
||||||
uses: appleboy/ssh-action@v1.2.0
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
|||||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
|
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
|
|||||||
47
.github/workflows/image.yml
vendored
47
.github/workflows/image.yml
vendored
@@ -280,7 +280,6 @@ jobs:
|
|||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
latest-image: ${{ matrix.latest-image }}
|
latest-image: ${{ matrix.latest-image }}
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
skip-drivers: ${{ matrix.skip-drivers }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -302,7 +301,6 @@ jobs:
|
|||||||
latest-image: 'latest-cpu'
|
latest-image: 'latest-cpu'
|
||||||
latest-image-aio: 'latest-aio-cpu'
|
latest-image-aio: 'latest-aio-cpu'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -314,7 +312,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
@@ -326,7 +323,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -338,7 +334,6 @@ jobs:
|
|||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
@@ -349,7 +344,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
@@ -360,45 +354,4 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
gh-runner:
|
|
||||||
uses: ./.github/workflows/image_build.yml
|
|
||||||
with:
|
|
||||||
tag-latest: ${{ matrix.tag-latest }}
|
|
||||||
tag-suffix: ${{ matrix.tag-suffix }}
|
|
||||||
ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
image-type: ${{ matrix.image-type }}
|
|
||||||
build-type: ${{ matrix.build-type }}
|
|
||||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
||||||
platforms: ${{ matrix.platforms }}
|
|
||||||
runs-on: ${{ matrix.runs-on }}
|
|
||||||
aio: ${{ matrix.aio }}
|
|
||||||
base-image: ${{ matrix.base-image }}
|
|
||||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
makeflags: ${{ matrix.makeflags }}
|
|
||||||
latest-image: ${{ matrix.latest-image }}
|
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
skip-drivers: ${{ matrix.skip-drivers }}
|
|
||||||
secrets:
|
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
||||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
||||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- build-type: 'cublas'
|
|
||||||
cuda-major-version: "12"
|
|
||||||
cuda-minor-version: "0"
|
|
||||||
platforms: 'linux/arm64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-nvidia-l4t-arm64-core'
|
|
||||||
latest-image: 'latest-nvidia-l4t-arm64-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
|
||||||
runs-on: 'ubuntu-24.04-arm'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
skip-drivers: 'true'
|
|
||||||
6
.github/workflows/image_build.yml
vendored
6
.github/workflows/image_build.yml
vendored
@@ -49,10 +49,6 @@ on:
|
|||||||
description: 'FFMPEG'
|
description: 'FFMPEG'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
skip-drivers:
|
|
||||||
description: 'Skip drivers by default'
|
|
||||||
default: 'false'
|
|
||||||
type: string
|
|
||||||
image-type:
|
image-type:
|
||||||
description: 'Image type'
|
description: 'Image type'
|
||||||
default: ''
|
default: ''
|
||||||
@@ -238,7 +234,6 @@ jobs:
|
|||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
@@ -267,7 +262,6 @@ jobs:
|
|||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
|
|||||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -79,7 +79,7 @@ jobs:
|
|||||||
args: ${{ steps.summarize.outputs.message }}
|
args: ${{ steps.summarize.outputs.message }}
|
||||||
- name: Setup tmate session if fails
|
- name: Setup tmate session if fails
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -161,7 +161,7 @@ jobs:
|
|||||||
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
|
||||||
- name: Setup tmate session if fails
|
- name: Setup tmate session if fails
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|||||||
43
.github/workflows/release.yaml
vendored
43
.github/workflows/release.yaml
vendored
@@ -123,7 +123,7 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -232,12 +232,45 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
limit-access-to-actor: true
|
limit-access-to-actor: true
|
||||||
|
build-stablediffusion:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
cache: false
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
- name: Build stablediffusion
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
make backend-assets/grpc/stablediffusion
|
||||||
|
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||||
|
env:
|
||||||
|
GO_TAGS: stablediffusion
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: stablediffusion
|
||||||
|
path: release/
|
||||||
|
- name: Release
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
|
with:
|
||||||
|
files: |
|
||||||
|
release/*
|
||||||
|
|
||||||
build-macOS-x86_64:
|
build-macOS-x86_64:
|
||||||
runs-on: macos-13
|
runs-on: macos-13
|
||||||
@@ -275,7 +308,7 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -317,7 +350,7 @@ jobs:
|
|||||||
release/*
|
release/*
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.22.0
|
uses: securego/gosec@v2.21.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
128
.github/workflows/test-extra.yml
vendored
128
.github/workflows/test-extra.yml
vendored
@@ -35,6 +35,30 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||||
|
|
||||||
|
tests-sentencetransformers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
# Install UV
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
|
- name: Test sentencetransformers
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||||
|
|
||||||
|
|
||||||
tests-rerankers:
|
tests-rerankers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -78,27 +102,71 @@ jobs:
|
|||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||||
|
|
||||||
# tests-transformers-musicgen:
|
tests-parler-tts:
|
||||||
# runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# steps:
|
steps:
|
||||||
# - name: Clone
|
- name: Clone
|
||||||
# uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
# with:
|
with:
|
||||||
# submodules: true
|
submodules: true
|
||||||
# - name: Dependencies
|
- name: Dependencies
|
||||||
# run: |
|
run: |
|
||||||
# sudo apt-get update
|
sudo apt-get update
|
||||||
# sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg
|
||||||
# # Install UV
|
# Install UV
|
||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
# - name: Test transformers-musicgen
|
- name: Test parler-tts
|
||||||
# run: |
|
run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||||
|
|
||||||
|
tests-openvoice:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
# Install UV
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
|
- name: Test openvoice
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
||||||
|
|
||||||
|
tests-transformers-musicgen:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
# Install UV
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
|
- name: Test transformers-musicgen
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
@@ -185,6 +253,26 @@ jobs:
|
|||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||||
|
tests-vallex:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
# Install UV
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
- name: Test vall-e-x
|
||||||
|
run: |
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
||||||
|
|
||||||
tests-coqui:
|
tests-coqui:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
17
.github/workflows/test.yml
vendored
17
.github/workflows/test.yml
vendored
@@ -100,12 +100,15 @@ jobs:
|
|||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
make -C backend/python/transformers
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||||
|
|
||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
make sources/go-piper && \
|
make sources/go-piper && \
|
||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||||
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
|
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
CUDA_VERSION: 12-4
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
@@ -127,10 +130,10 @@ jobs:
|
|||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -194,7 +197,7 @@ jobs:
|
|||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
@@ -221,7 +224,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
pip install --user --no-cache-dir grpcio-tools
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
@@ -232,7 +235,7 @@ jobs:
|
|||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
with:
|
with:
|
||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -2,7 +2,6 @@
|
|||||||
/sources/
|
/sources/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.a
|
*.a
|
||||||
*.o
|
|
||||||
get-sources
|
get-sources
|
||||||
prepare-sources
|
prepare-sources
|
||||||
/backend/cpp/llama/grpc-server
|
/backend/cpp/llama/grpc-server
|
||||||
@@ -13,6 +12,7 @@ prepare-sources
|
|||||||
|
|
||||||
go-ggml-transformers
|
go-ggml-transformers
|
||||||
go-gpt2
|
go-gpt2
|
||||||
|
go-rwkv
|
||||||
whisper.cpp
|
whisper.cpp
|
||||||
/bloomz
|
/bloomz
|
||||||
go-bert
|
go-bert
|
||||||
|
|||||||
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -26,7 +26,7 @@
|
|||||||
"LOCALAI_P2P": "true",
|
"LOCALAI_P2P": "true",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
},
|
},
|
||||||
"buildFlags": ["-tags", "p2p tts", "-v"],
|
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
||||||
"envFile": "${workspaceFolder}/.env",
|
"envFile": "${workspaceFolder}/.env",
|
||||||
"cwd": "${workspaceRoot}"
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
|
|||||||
115
Dockerfile
115
Dockerfile
@@ -9,38 +9,25 @@ FROM ${BASE_IMAGE} AS requirements-core
|
|||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG GO_VERSION=1.22.6
|
ARG GO_VERSION=1.22.6
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
build-essential \
|
build-essential \
|
||||||
ccache \
|
ccache \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
curl libssl-dev \
|
cmake \
|
||||||
|
curl \
|
||||||
git \
|
git \
|
||||||
unzip upx-ucl && \
|
unzip upx-ucl && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
|
||||||
@@ -68,10 +55,14 @@ ENV PATH=/opt/rocm/bin:${PATH}
|
|||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libopenblas-dev && \
|
libopenblas-dev \
|
||||||
|
libopencv-dev && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Set up OpenCV
|
||||||
|
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
@@ -80,8 +71,7 @@ WORKDIR /build
|
|||||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||||
FROM requirements-core AS requirements-extras
|
FROM requirements-core AS requirements-extras
|
||||||
|
|
||||||
# Install uv as a system package
|
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
@@ -110,13 +100,12 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
|||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=12
|
||||||
ARG CUDA_MINOR_VERSION=0
|
ARG CUDA_MINOR_VERSION=0
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
|
||||||
# Vulkan requirements
|
# Vulkan requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
software-properties-common pciutils wget gpg-agent && \
|
||||||
@@ -132,7 +121,7 @@ EOT
|
|||||||
|
|
||||||
# CuBLAS requirements
|
# CuBLAS requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils
|
software-properties-common pciutils
|
||||||
@@ -158,7 +147,7 @@ RUN <<EOT bash
|
|||||||
EOT
|
EOT
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libclblast-dev && \
|
libclblast-dev && \
|
||||||
@@ -166,7 +155,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|||||||
rm -rf /var/lib/apt/lists/* \
|
rm -rf /var/lib/apt/lists/* \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
hipblas-dev \
|
hipblas-dev \
|
||||||
@@ -199,8 +188,6 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
|
|||||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||||
ARG GRPC_VERSION=v1.65.0
|
ARG GRPC_VERSION=v1.65.0
|
||||||
ARG CMAKE_FROM_SOURCE=false
|
|
||||||
ARG CMAKE_VERSION=3.26.4
|
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||||
|
|
||||||
@@ -209,24 +196,12 @@ WORKDIR /build
|
|||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
build-essential curl libssl-dev \
|
build-essential \
|
||||||
|
cmake \
|
||||||
git && \
|
git && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install CMake (the version in 22.04 is too old)
|
|
||||||
RUN <<EOT bash
|
|
||||||
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
|
|
||||||
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
|
|
||||||
else
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
cmake && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later
|
||||||
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
|
||||||
# and running make install in the target container
|
# and running make install in the target container
|
||||||
@@ -246,7 +221,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
|
|
||||||
FROM requirements-drivers AS builder-base
|
FROM requirements-drivers AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS="tts p2p"
|
ARG GO_TAGS="stablediffusion tts p2p"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG LD_FLAGS="-s -w"
|
ARG LD_FLAGS="-s -w"
|
||||||
@@ -280,12 +255,35 @@ RUN <<EOT bash
|
|||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
|
||||||
|
###################################
|
||||||
|
###################################
|
||||||
|
|
||||||
|
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
||||||
|
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
||||||
|
FROM builder-base AS builder-sd
|
||||||
|
|
||||||
|
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
||||||
|
COPY Makefile .
|
||||||
|
COPY go.mod .
|
||||||
|
COPY go.sum .
|
||||||
|
COPY backend/backend.proto ./backend/backend.proto
|
||||||
|
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
||||||
|
COPY pkg/grpc ./pkg/grpc
|
||||||
|
COPY pkg/stablediffusion ./pkg/stablediffusion
|
||||||
|
RUN git init
|
||||||
|
RUN make sources/go-stable-diffusion
|
||||||
|
RUN touch prepare-sources
|
||||||
|
|
||||||
|
# Actually build the backend
|
||||||
|
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
# Adjustments to the build process should likely be made here.
|
# Adjustments to the build process should likely be made here.
|
||||||
FROM builder-base AS builder
|
FROM builder-sd AS builder
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
# Install the pre-built GRPC
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
@@ -303,7 +301,7 @@ RUN make prepare
|
|||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||||
## (both will use CUDA or hipblas for the actual computation)
|
## (both will use CUDA or hipblas for the actual computation)
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
fi
|
fi
|
||||||
@@ -325,6 +323,8 @@ ARG FFMPEG
|
|||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
|
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
@@ -397,28 +397,36 @@ COPY --from=builder /build/local-ai ./
|
|||||||
# Copy shared libraries for piper
|
# Copy shared libraries for piper
|
||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
|
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||||
|
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
# Change the shell to bash so we can use [[ tests below
|
||||||
SHELL ["/bin/bash", "-c"]
|
SHELL ["/bin/bash", "-c"]
|
||||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||||
|
|
||||||
RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
|
||||||
apt-get -qq -y install espeak-ng \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/coqui \
|
make -C backend/python/coqui \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/faster-whisper \
|
make -C backend/python/parler-tts \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/diffusers \
|
make -C backend/python/diffusers \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/transformers-musicgen \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/kokoro \
|
make -C backend/python/vall-e-x \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/openvoice \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/sentencetransformers \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/exllama2 \
|
make -C backend/python/exllama2 \
|
||||||
@@ -438,6 +446,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
|
|||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/rerankers \
|
make -C backend/python/rerankers \
|
||||||
|
; fi && \
|
||||||
|
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
|
make -C backend/python/mamba \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
|
|||||||
326
Makefile
326
Makefile
@@ -8,27 +8,31 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
CPPLLAMA_VERSION?=d5ed2b929d85bbd7dbeecb690880f07d9d7a6077
|
||||||
|
|
||||||
|
# go-rwkv version
|
||||||
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
|
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
WHISPER_CPP_VERSION?=ccc2547210e09e3a1785817383ab770389bb442b
|
||||||
|
|
||||||
|
# bert.cpp version
|
||||||
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
|
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||||
|
|
||||||
# bark.cpp
|
# stablediffusion version
|
||||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||||
BARKCPP_VERSION?=v1.0.0
|
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
# tinydream version
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
||||||
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||||
|
|
||||||
ONNX_VERSION?=1.20.0
|
|
||||||
ONNX_ARCH?=x64
|
|
||||||
ONNX_OS?=linux
|
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
@@ -41,7 +45,6 @@ CGO_LDFLAGS_WHISPER+=-lggml
|
|||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=
|
BUILD_ID?=
|
||||||
NATIVE?=false
|
|
||||||
|
|
||||||
TEST_DIR=/tmp/test
|
TEST_DIR=/tmp/test
|
||||||
|
|
||||||
@@ -80,25 +83,7 @@ ifndef UNAME_S
|
|||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
|
||||||
ifeq ($(NATIVE),false)
|
|
||||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Detect if we are running on arm64
|
|
||||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=aarch64
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OS),Darwin)
|
ifeq ($(OS),Darwin)
|
||||||
ONNX_OS=osx
|
|
||||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=arm64
|
|
||||||
else ifneq (,$(findstring arm64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=arm64
|
|
||||||
else
|
|
||||||
ONNX_ARCH=x86_64
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||||
@@ -153,10 +138,10 @@ ifeq ($(BUILD_TYPE),hipblas)
|
|||||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
# llama-ggml has no hipblas support, so override it here.
|
# llama-ggml has no hipblas support, so override it here.
|
||||||
export STABLE_BUILD_TYPE=
|
export STABLE_BUILD_TYPE=
|
||||||
export GGML_HIP=1
|
export GGML_HIPBLAS=1
|
||||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -175,6 +160,16 @@ ifeq ($(STATIC),true)
|
|||||||
LD_FLAGS+=-linkmode external -extldflags -static
|
LD_FLAGS+=-linkmode external -extldflags -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
||||||
|
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
|
||||||
|
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
|
||||||
|
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
|
||||||
|
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
@@ -184,24 +179,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
|
|
||||||
ifeq ($(ONNX_OS),linux)
|
|
||||||
ifeq ($(ONNX_ARCH),x64)
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
|
|
||||||
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||||
# Use filter-out to remove the specified backends
|
# Use filter-out to remove the specified backends
|
||||||
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
||||||
@@ -222,6 +209,19 @@ endif
|
|||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
|
## BERT embeddings
|
||||||
|
sources/go-bert.cpp:
|
||||||
|
mkdir -p sources/go-bert.cpp
|
||||||
|
cd sources/go-bert.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(BERT_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(BERT_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||||
|
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||||
|
|
||||||
## go-llama.cpp
|
## go-llama.cpp
|
||||||
sources/go-llama.cpp:
|
sources/go-llama.cpp:
|
||||||
mkdir -p sources/go-llama.cpp
|
mkdir -p sources/go-llama.cpp
|
||||||
@@ -235,23 +235,6 @@ sources/go-llama.cpp:
|
|||||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
## bark.cpp
|
|
||||||
sources/bark.cpp:
|
|
||||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
|
||||||
cd sources/bark.cpp && \
|
|
||||||
git checkout $(BARKCPP_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
|
||||||
cd sources/bark.cpp && \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) .. && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
|
|
||||||
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
|
|
||||||
$(MAKE) -C backend/go/bark libbark.a
|
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
sources/go-piper:
|
sources/go-piper:
|
||||||
mkdir -p sources/go-piper
|
mkdir -p sources/go-piper
|
||||||
@@ -265,37 +248,45 @@ sources/go-piper:
|
|||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
## stablediffusion (ggml)
|
|
||||||
sources/stablediffusion-ggml.cpp:
|
## RWKV
|
||||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
sources/go-rwkv.cpp:
|
||||||
cd sources/stablediffusion-ggml.cpp && \
|
mkdir -p sources/go-rwkv.cpp
|
||||||
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
cd sources/go-rwkv.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(RWKV_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(RWKV_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
|
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
## stable diffusion
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
sources/go-stable-diffusion:
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
mkdir -p sources/go-stable-diffusion
|
||||||
ifneq ($(UPX),)
|
cd sources/go-stable-diffusion && \
|
||||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
git init && \
|
||||||
endif
|
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(STABLEDIFFUSION_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/onnxruntime:
|
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||||
mkdir -p sources/onnxruntime
|
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||||
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
|
||||||
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
|
||||||
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
|
|
||||||
|
|
||||||
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
|
## tiny-dream
|
||||||
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
|
sources/go-tiny-dream:
|
||||||
ifeq ($(OS),Darwin)
|
mkdir -p sources/go-tiny-dream
|
||||||
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
|
cd sources/go-tiny-dream && \
|
||||||
else
|
git init && \
|
||||||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
git remote add origin $(TINYDREAM_REPO) && \
|
||||||
endif
|
git fetch origin && \
|
||||||
|
git checkout $(TINYDREAM_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
||||||
|
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
||||||
|
|
||||||
## whisper
|
## whisper
|
||||||
sources/whisper.cpp:
|
sources/whisper.cpp:
|
||||||
@@ -310,18 +301,26 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||||
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
@@ -331,8 +330,12 @@ prepare-sources: get-sources replace
|
|||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
|
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
|
$(MAKE) -C sources/go-bert.cpp clean
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
|
$(MAKE) -C sources/go-tiny-dream clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||||
@@ -345,9 +348,7 @@ clean: ## Remove build related file
|
|||||||
rm -rf release/
|
rm -rf release/
|
||||||
rm -rf backend-assets/*
|
rm -rf backend-assets/*
|
||||||
$(MAKE) -C backend/cpp/grpc clean
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
$(MAKE) -C backend/go/bark clean
|
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
|
|
||||||
rm -rf backend/cpp/llama-* || true
|
rm -rf backend/cpp/llama-* || true
|
||||||
$(MAKE) dropreplace
|
$(MAKE) dropreplace
|
||||||
$(MAKE) protogen-clean
|
$(MAKE) protogen-clean
|
||||||
@@ -438,6 +439,8 @@ test-models/testmodel.ggml:
|
|||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
|
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||||
|
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
prepare-test: grpcs
|
prepare-test: grpcs
|
||||||
@@ -446,9 +449,9 @@ prepare-test: grpcs
|
|||||||
|
|
||||||
test: prepare test-models/testmodel.ggml grpcs
|
test: prepare test-models/testmodel.ggml grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts debug"
|
export GO_TAGS="tts stablediffusion debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
$(MAKE) test-llama
|
$(MAKE) test-llama
|
||||||
$(MAKE) test-llama-gguf
|
$(MAKE) test-llama-gguf
|
||||||
@@ -467,13 +470,13 @@ run-e2e-image:
|
|||||||
|
|
||||||
run-e2e-aio: protogen-go
|
run-e2e-aio: protogen-go
|
||||||
@echo 'Running e2e AIO tests'
|
@echo 'Running e2e AIO tests'
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
||||||
|
|
||||||
test-e2e:
|
test-e2e:
|
||||||
@echo 'Running e2e tests'
|
@echo 'Running e2e tests'
|
||||||
BUILD_TYPE=$(BUILD_TYPE) \
|
BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
|
||||||
|
|
||||||
teardown-e2e:
|
teardown-e2e:
|
||||||
rm -rf $(TEST_DIR) || true
|
rm -rf $(TEST_DIR) || true
|
||||||
@@ -481,24 +484,24 @@ teardown-e2e:
|
|||||||
|
|
||||||
test-llama: prepare-test
|
test-llama: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-llama-gguf: prepare-test
|
test-llama-gguf: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-tts: prepare-test
|
test-tts: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-stablediffusion: prepare-test
|
test-stablediffusion: prepare-test
|
||||||
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
test-stores: backend-assets/grpc/local-store
|
test-stores: backend-assets/grpc/local-store
|
||||||
mkdir -p tests/integration/backend-assets/grpc
|
mkdir -p tests/integration/backend-assets/grpc
|
||||||
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
|
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
|
||||||
|
|
||||||
test-container:
|
test-container:
|
||||||
docker build --target requirements -t local-ai-test-container .
|
docker build --target requirements -t local-ai-test-container .
|
||||||
@@ -534,10 +537,10 @@ protogen-go-clean:
|
|||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
.PHONY: autogptq-protogen
|
||||||
autogptq-protogen:
|
autogptq-protogen:
|
||||||
@@ -571,14 +574,6 @@ diffusers-protogen:
|
|||||||
diffusers-protogen-clean:
|
diffusers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
.PHONY: faster-whisper-protogen
|
|
||||||
faster-whisper-protogen:
|
|
||||||
$(MAKE) -C backend/python/faster-whisper protogen
|
|
||||||
|
|
||||||
.PHONY: faster-whisper-protogen-clean
|
|
||||||
faster-whisper-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/faster-whisper protogen-clean
|
|
||||||
|
|
||||||
.PHONY: exllama2-protogen
|
.PHONY: exllama2-protogen
|
||||||
exllama2-protogen:
|
exllama2-protogen:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen
|
$(MAKE) -C backend/python/exllama2 protogen
|
||||||
@@ -587,6 +582,14 @@ exllama2-protogen:
|
|||||||
exllama2-protogen-clean:
|
exllama2-protogen-clean:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||||
|
|
||||||
|
.PHONY: mamba-protogen
|
||||||
|
mamba-protogen:
|
||||||
|
$(MAKE) -C backend/python/mamba protogen
|
||||||
|
|
||||||
|
.PHONY: mamba-protogen-clean
|
||||||
|
mamba-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/mamba protogen-clean
|
||||||
|
|
||||||
.PHONY: rerankers-protogen
|
.PHONY: rerankers-protogen
|
||||||
rerankers-protogen:
|
rerankers-protogen:
|
||||||
$(MAKE) -C backend/python/rerankers protogen
|
$(MAKE) -C backend/python/rerankers protogen
|
||||||
@@ -595,6 +598,14 @@ rerankers-protogen:
|
|||||||
rerankers-protogen-clean:
|
rerankers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: sentencetransformers-protogen
|
||||||
|
sentencetransformers-protogen:
|
||||||
|
$(MAKE) -C backend/python/sentencetransformers protogen
|
||||||
|
|
||||||
|
.PHONY: sentencetransformers-protogen-clean
|
||||||
|
sentencetransformers-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
||||||
|
|
||||||
.PHONY: transformers-protogen
|
.PHONY: transformers-protogen
|
||||||
transformers-protogen:
|
transformers-protogen:
|
||||||
$(MAKE) -C backend/python/transformers protogen
|
$(MAKE) -C backend/python/transformers protogen
|
||||||
@@ -603,13 +614,37 @@ transformers-protogen:
|
|||||||
transformers-protogen-clean:
|
transformers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/transformers protogen-clean
|
$(MAKE) -C backend/python/transformers protogen-clean
|
||||||
|
|
||||||
.PHONY: kokoro-protogen
|
.PHONY: parler-tts-protogen
|
||||||
kokoro-protogen:
|
parler-tts-protogen:
|
||||||
$(MAKE) -C backend/python/kokoro protogen
|
$(MAKE) -C backend/python/parler-tts protogen
|
||||||
|
|
||||||
.PHONY: kokoro-protogen-clean
|
.PHONY: parler-tts-protogen-clean
|
||||||
kokoro-protogen-clean:
|
parler-tts-protogen-clean:
|
||||||
$(MAKE) -C backend/python/kokoro protogen-clean
|
$(MAKE) -C backend/python/parler-tts protogen-clean
|
||||||
|
|
||||||
|
.PHONY: transformers-musicgen-protogen
|
||||||
|
transformers-musicgen-protogen:
|
||||||
|
$(MAKE) -C backend/python/transformers-musicgen protogen
|
||||||
|
|
||||||
|
.PHONY: transformers-musicgen-protogen-clean
|
||||||
|
transformers-musicgen-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
|
||||||
|
|
||||||
|
.PHONY: vall-e-x-protogen
|
||||||
|
vall-e-x-protogen:
|
||||||
|
$(MAKE) -C backend/python/vall-e-x protogen
|
||||||
|
|
||||||
|
.PHONY: vall-e-x-protogen-clean
|
||||||
|
vall-e-x-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
||||||
|
|
||||||
|
.PHONY: openvoice-protogen
|
||||||
|
openvoice-protogen:
|
||||||
|
$(MAKE) -C backend/python/openvoice protogen
|
||||||
|
|
||||||
|
.PHONY: openvoice-protogen-clean
|
||||||
|
openvoice-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/openvoice protogen-clean
|
||||||
|
|
||||||
.PHONY: vllm-protogen
|
.PHONY: vllm-protogen
|
||||||
vllm-protogen:
|
vllm-protogen:
|
||||||
@@ -626,11 +661,15 @@ prepare-extra-conda-environments: protogen-python
|
|||||||
$(MAKE) -C backend/python/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C backend/python/coqui
|
$(MAKE) -C backend/python/coqui
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
$(MAKE) -C backend/python/faster-whisper
|
|
||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
|
$(MAKE) -C backend/python/mamba
|
||||||
|
$(MAKE) -C backend/python/sentencetransformers
|
||||||
$(MAKE) -C backend/python/rerankers
|
$(MAKE) -C backend/python/rerankers
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/kokoro
|
$(MAKE) -C backend/python/transformers-musicgen
|
||||||
|
$(MAKE) -C backend/python/parler-tts
|
||||||
|
$(MAKE) -C backend/python/vall-e-x
|
||||||
|
$(MAKE) -C backend/python/openvoice
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
@@ -654,6 +693,13 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
|||||||
backend-assets/grpc: protogen-go replace
|
backend-assets/grpc: protogen-go replace
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
|
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/bert-embeddings
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
@@ -700,13 +746,6 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
|
|||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
|
||||||
$(MAKE) -C backend/cpp/llama-avx512 purge
|
|
||||||
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
|
||||||
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
@@ -720,6 +759,10 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
|
|||||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||||
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
|
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||||
@@ -732,7 +775,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
|
|||||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
@@ -767,13 +810,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/llama-ggml
|
$(UPX) backend-assets/grpc/llama-ggml
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/bark-cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||||
@@ -781,11 +817,25 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/piper
|
$(UPX) backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
$(UPX) backend-assets/grpc/silero-vad
|
$(UPX) backend-assets/grpc/rwkv
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/stablediffusion
|
||||||
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/tinydream
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||||
@@ -841,7 +891,7 @@ docker-aio-all:
|
|||||||
|
|
||||||
docker-image-intel:
|
docker-image-intel:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
@@ -849,7 +899,7 @@ docker-image-intel:
|
|||||||
|
|
||||||
docker-image-intel-xpu:
|
docker-image-intel-xpu:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
|
|||||||
62
README.md
62
README.md
@@ -38,13 +38,9 @@
|
|||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
@@ -60,47 +56,29 @@ curl https://localai.io/install.sh | sh
|
|||||||
|
|
||||||
Or run with docker:
|
Or run with docker:
|
||||||
```bash
|
```bash
|
||||||
# CPU only image:
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
|
||||||
|
|
||||||
# Nvidia GPU:
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
|
||||||
|
|
||||||
# CPU and GPU image (bigger size):
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
|
||||||
|
|
||||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||||
```
|
# Alternative images:
|
||||||
|
# - if you have an Nvidia GPU:
|
||||||
To load models:
|
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||||
|
# - without preconfigured models
|
||||||
```bash
|
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||||
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
|
# - without preconfigured models for Nvidia GPUs
|
||||||
local-ai run llama-3.2-1b-instruct:q4_k_m
|
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||||
# Start LocalAI with the phi-2 model directly from huggingface
|
|
||||||
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
|
||||||
# Install and run a model from the Ollama OCI registry
|
|
||||||
local-ai run ollama://gemma:2b
|
|
||||||
# Run a model from a configuration file
|
|
||||||
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
|
||||||
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
|
||||||
local-ai run oci://localai/phi-2:latest
|
|
||||||
```
|
```
|
||||||
|
|
||||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
## 📰 Latest project news
|
## 📰 Latest project news
|
||||||
|
|
||||||
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
|
||||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
|
||||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
|
||||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
|
||||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||||
|
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||||
|
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
||||||
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||||
|
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||||
|
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
||||||
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||||
|
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
|
||||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
@@ -109,10 +87,12 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
|
|||||||
|
|
||||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||||
|
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||||
|
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
||||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
||||||
|
|
||||||
@@ -120,10 +100,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||||
- 🎨 [Image generation](https://localai.io/features/image-generation)
|
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
||||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
@@ -131,7 +111,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||||
- 🔊 Voice activity detection (Silero-VAD support)
|
|
||||||
- 🌍 Integrated WebUI!
|
- 🌍 Integrated WebUI!
|
||||||
|
|
||||||
## 💻 Usage
|
## 💻 Usage
|
||||||
@@ -154,7 +133,6 @@ Model galleries
|
|||||||
Other:
|
Other:
|
||||||
- Helm chart https://github.com/go-skynet/helm-charts
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
|
|
||||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
- Terminal utility https://github.com/djcopley/ShellOracle
|
||||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
||||||
@@ -162,9 +140,6 @@ Other:
|
|||||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
||||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||||
- Another Telegram Bot https://github.com/JackBekket/Hellper
|
|
||||||
- Auto-documentation https://github.com/JackBekket/Reflexia
|
|
||||||
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
|
|
||||||
- Github Actions: https://github.com/marketplace/actions/start-localai
|
- Github Actions: https://github.com/marketplace/actions/start-localai
|
||||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||||
|
|
||||||
@@ -239,6 +214,7 @@ LocalAI couldn't have been built without the help of great software already avai
|
|||||||
- https://github.com/antimatter15/alpaca.cpp
|
- https://github.com/antimatter15/alpaca.cpp
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||||
- https://github.com/ggerganov/whisper.cpp
|
- https://github.com/ggerganov/whisper.cpp
|
||||||
|
- https://github.com/saharNooby/rwkv.cpp
|
||||||
- https://github.com/rhasspy/piper
|
- https://github.com/rhasspy/piper
|
||||||
|
|
||||||
## 🤗 Contributors
|
## 🤗 Contributors
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
embeddings: true
|
backend: bert-embeddings
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -1,17 +1,56 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
backend: stablediffusion-ggml
|
backend: stablediffusion
|
||||||
cfg_scale: 4.5
|
|
||||||
|
|
||||||
options:
|
|
||||||
- sampler:euler
|
|
||||||
parameters:
|
parameters:
|
||||||
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
model: stablediffusion_assets
|
||||||
step: 25
|
|
||||||
|
license: "BSD-3"
|
||||||
|
urls:
|
||||||
|
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||||
|
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
||||||
|
|
||||||
|
description: |
|
||||||
|
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||||
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
||||||
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||||
|
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
||||||
|
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||||
|
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
||||||
|
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||||
|
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
||||||
|
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
||||||
|
- filename: "stablediffusion_assets/log_sigmas.bin"
|
||||||
|
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||||
|
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||||
|
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
||||||
|
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
||||||
|
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
||||||
|
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||||
|
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
||||||
|
- filename: "stablediffusion_assets/vocab.txt"
|
||||||
|
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||||
|
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/images/generations \
|
curl http://localhost:8080/v1/images/generations \
|
||||||
|
|||||||
@@ -28,8 +28,6 @@ service Backend {
|
|||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
|
|
||||||
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
||||||
|
|
||||||
rpc VAD(VADRequest) returns (VADResponse) {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Define the empty request
|
// Define the empty request
|
||||||
@@ -159,8 +157,6 @@ message Reply {
|
|||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
int32 tokens = 2;
|
int32 tokens = 2;
|
||||||
int32 prompt_tokens = 3;
|
int32 prompt_tokens = 3;
|
||||||
double timing_prompt_processing = 4;
|
|
||||||
double timing_token_generation = 5;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -223,7 +219,6 @@ message ModelOptions {
|
|||||||
int32 SwapSpace = 53;
|
int32 SwapSpace = 53;
|
||||||
int32 MaxModelLen = 54;
|
int32 MaxModelLen = 54;
|
||||||
int32 TensorParallelSize = 55;
|
int32 TensorParallelSize = 55;
|
||||||
string LoadFormat = 58;
|
|
||||||
|
|
||||||
string MMProj = 41;
|
string MMProj = 41;
|
||||||
|
|
||||||
@@ -237,16 +232,6 @@ message ModelOptions {
|
|||||||
|
|
||||||
bool FlashAttention = 56;
|
bool FlashAttention = 56;
|
||||||
bool NoKVOffload = 57;
|
bool NoKVOffload = 57;
|
||||||
|
|
||||||
string ModelPath = 59;
|
|
||||||
|
|
||||||
repeated string LoraAdapters = 60;
|
|
||||||
repeated float LoraScales = 61;
|
|
||||||
|
|
||||||
repeated string Options = 62;
|
|
||||||
|
|
||||||
string CacheTypeKey = 63;
|
|
||||||
string CacheTypeValue = 64;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -302,19 +287,6 @@ message TTSRequest {
|
|||||||
optional string language = 5;
|
optional string language = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message VADRequest {
|
|
||||||
repeated float audio = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message VADSegment {
|
|
||||||
float start = 1;
|
|
||||||
float end = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message VADResponse {
|
|
||||||
repeated VADSegment segments = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message SoundGenerationRequest {
|
message SoundGenerationRequest {
|
||||||
string text = 1;
|
string text = 1;
|
||||||
string model = 2;
|
string model = 2;
|
||||||
@@ -350,4 +322,4 @@ message StatusResponse {
|
|||||||
message Message {
|
message Message {
|
||||||
string role = 1;
|
string role = 1;
|
||||||
string content = 2;
|
string content = 2;
|
||||||
}
|
}
|
||||||
@@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas)
|
|||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
# But if it's OSX without metal, disable it here
|
# But if it's OSX without metal, disable it here
|
||||||
else ifeq ($(OS),Darwin)
|
else ifeq ($(OS),Darwin)
|
||||||
@@ -30,7 +30,9 @@ else ifeq ($(OS),Darwin)
|
|||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
else
|
else
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
# Until this is tested properly, we disable embedded metal file
|
||||||
|
# as we already embed it as part of the LocalAI assets
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
|
||||||
TARGET+=--target ggml-metal
|
TARGET+=--target ggml-metal
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -22,7 +22,6 @@
|
|||||||
#include "backend.grpc.pb.h"
|
#include "backend.grpc.pb.h"
|
||||||
#include "utils.hpp"
|
#include "utils.hpp"
|
||||||
#include "sampling.h"
|
#include "sampling.h"
|
||||||
#include "speculative.h"
|
|
||||||
// include std::regex
|
// include std::regex
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
@@ -114,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
|||||||
std::string ret;
|
std::string ret;
|
||||||
for (; begin != end; ++begin)
|
for (; begin != end; ++begin)
|
||||||
{
|
{
|
||||||
ret += common_token_to_piece(ctx, *begin);
|
ret += llama_token_to_piece(ctx, *begin);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -122,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
|||||||
// format incomplete utf-8 multibyte character for output
|
// format incomplete utf-8 multibyte character for output
|
||||||
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
||||||
{
|
{
|
||||||
std::string out = token == -1 ? "" : common_token_to_piece(ctx, token);
|
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
||||||
// if the size is 1 and first bit is 1, meaning it's a partial character
|
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||||
// (size > 1 meaning it's already a known token)
|
// (size > 1 meaning it's already a known token)
|
||||||
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
|
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
|
||||||
@@ -135,32 +134,6 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Adds an RPC server
|
|
||||||
// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
|
|
||||||
static void add_rpc_devices(std::string servers) {
|
|
||||||
auto rpc_servers = string_split<std::string>(servers, ',');
|
|
||||||
if (rpc_servers.empty()) {
|
|
||||||
throw std::invalid_argument("no RPC servers specified");
|
|
||||||
}
|
|
||||||
ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
|
|
||||||
if (!rpc_reg) {
|
|
||||||
throw std::invalid_argument("failed to find RPC backend");
|
|
||||||
}
|
|
||||||
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
|
|
||||||
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
|
|
||||||
if (!ggml_backend_rpc_add_device_fn) {
|
|
||||||
throw std::invalid_argument("failed to find RPC device add function");
|
|
||||||
}
|
|
||||||
for (const auto & server : rpc_servers) {
|
|
||||||
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
|
|
||||||
if (dev) {
|
|
||||||
ggml_backend_device_register(dev);
|
|
||||||
} else {
|
|
||||||
throw std::invalid_argument("failed to register RPC device");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// convert a vector of completion_token_output to json
|
// convert a vector of completion_token_output to json
|
||||||
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
|
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
|
||||||
{
|
{
|
||||||
@@ -186,45 +159,12 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_slot_params {
|
|
||||||
uint32_t seed = -1; // RNG seed
|
|
||||||
bool stream = true;
|
|
||||||
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
|
|
||||||
bool return_tokens = false;
|
|
||||||
|
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
|
||||||
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
|
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
|
||||||
int32_t n_indent = 0; // mininum line indentation for the generated text in number of whitespace characters
|
|
||||||
|
|
||||||
int64_t t_max_prompt_ms = -1; // TODO: implement
|
|
||||||
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
|
|
||||||
|
|
||||||
std::vector<common_adapter_lora_info> lora;
|
|
||||||
|
|
||||||
std::vector<std::string> antiprompt;
|
|
||||||
std::vector<std::string> response_fields;
|
|
||||||
bool timings_per_token = false;
|
|
||||||
bool post_sampling_probs = false;
|
|
||||||
bool ignore_eos = false;
|
|
||||||
|
|
||||||
json input_prefix;
|
|
||||||
json input_suffix;
|
|
||||||
|
|
||||||
struct common_params_sampling sampling;
|
|
||||||
struct common_params_speculative speculative;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct llama_client_slot
|
struct llama_client_slot
|
||||||
{
|
{
|
||||||
int id;
|
int id;
|
||||||
int task_id = -1;
|
int task_id = -1;
|
||||||
|
|
||||||
struct llama_slot_params params;
|
struct slot_params params;
|
||||||
common_speculative * spec = nullptr;
|
|
||||||
llama_batch batch_spec = {};
|
|
||||||
|
|
||||||
|
|
||||||
slot_state state = IDLE;
|
slot_state state = IDLE;
|
||||||
slot_command command = NONE;
|
slot_command command = NONE;
|
||||||
@@ -263,8 +203,8 @@ struct llama_client_slot
|
|||||||
std::string stopping_word;
|
std::string stopping_word;
|
||||||
|
|
||||||
// sampling
|
// sampling
|
||||||
struct common_params_sampling sparams;
|
struct gpt_sampler_params sparams;
|
||||||
common_sampler *ctx_sampling = nullptr;
|
gpt_sampler *ctx_sampling = nullptr;
|
||||||
|
|
||||||
int32_t ga_i = 0; // group-attention state
|
int32_t ga_i = 0; // group-attention state
|
||||||
int32_t ga_n = 1; // group-attention factor
|
int32_t ga_n = 1; // group-attention factor
|
||||||
@@ -317,8 +257,7 @@ struct llama_client_slot
|
|||||||
images.clear();
|
images.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool has_budget(gpt_params &global_params) {
|
||||||
bool has_budget(common_params &global_params) {
|
|
||||||
if (params.n_predict == -1 && global_params.n_predict == -1)
|
if (params.n_predict == -1 && global_params.n_predict == -1)
|
||||||
{
|
{
|
||||||
return true; // limitless
|
return true; // limitless
|
||||||
@@ -452,52 +391,14 @@ struct llama_metrics {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llava_embd_batch {
|
|
||||||
std::vector<llama_pos> pos;
|
|
||||||
std::vector<int32_t> n_seq_id;
|
|
||||||
std::vector<llama_seq_id> seq_id_0;
|
|
||||||
std::vector<llama_seq_id *> seq_ids;
|
|
||||||
std::vector<int8_t> logits;
|
|
||||||
llama_batch batch;
|
|
||||||
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
|
|
||||||
pos .resize(n_tokens);
|
|
||||||
n_seq_id.resize(n_tokens);
|
|
||||||
seq_ids .resize(n_tokens + 1);
|
|
||||||
logits .resize(n_tokens);
|
|
||||||
seq_id_0.resize(1);
|
|
||||||
seq_id_0[0] = seq_id;
|
|
||||||
seq_ids [n_tokens] = nullptr;
|
|
||||||
batch = {
|
|
||||||
/*n_tokens =*/ n_tokens,
|
|
||||||
/*tokens =*/ nullptr,
|
|
||||||
/*embd =*/ embd,
|
|
||||||
/*pos =*/ pos.data(),
|
|
||||||
/*n_seq_id =*/ n_seq_id.data(),
|
|
||||||
/*seq_id =*/ seq_ids.data(),
|
|
||||||
/*logits =*/ logits.data(),
|
|
||||||
};
|
|
||||||
for (int i = 0; i < n_tokens; i++) {
|
|
||||||
batch.pos [i] = pos_0 + i;
|
|
||||||
batch.n_seq_id[i] = 1;
|
|
||||||
batch.seq_id [i] = seq_id_0.data();
|
|
||||||
batch.logits [i] = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct llama_server_context
|
struct llama_server_context
|
||||||
{
|
{
|
||||||
llama_model *model = nullptr;
|
llama_model *model = nullptr;
|
||||||
llama_context *ctx = nullptr;
|
llama_context *ctx = nullptr;
|
||||||
common_init_result llama_init_dft;
|
|
||||||
llama_context * ctx_dft = nullptr;
|
|
||||||
llama_model * model_dft = nullptr;
|
|
||||||
llama_context_params cparams_dft;
|
|
||||||
const llama_vocab * vocab = nullptr;
|
|
||||||
|
|
||||||
clip_ctx *clp_ctx = nullptr;
|
clip_ctx *clp_ctx = nullptr;
|
||||||
|
|
||||||
common_params params;
|
gpt_params params;
|
||||||
|
|
||||||
llama_batch batch;
|
llama_batch batch;
|
||||||
|
|
||||||
@@ -505,7 +406,6 @@ struct llama_server_context
|
|||||||
bool clean_kv_cache = true;
|
bool clean_kv_cache = true;
|
||||||
bool all_slots_are_idle = false;
|
bool all_slots_are_idle = false;
|
||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
bool has_eos_token = true;
|
|
||||||
|
|
||||||
int32_t n_ctx; // total context for all clients / slots
|
int32_t n_ctx; // total context for all clients / slots
|
||||||
|
|
||||||
@@ -541,8 +441,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool load_model(const gpt_params ¶ms_)
|
||||||
bool load_model(const common_params ¶ms_)
|
|
||||||
{
|
{
|
||||||
params = params_;
|
params = params_;
|
||||||
if (!params.mmproj.empty()) {
|
if (!params.mmproj.empty()) {
|
||||||
@@ -559,9 +458,9 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
common_init_result common_init = common_init_from_params(params);
|
llama_init_result llama_init = llama_init_from_gpt_params(params);
|
||||||
model = common_init.model.release();
|
model = llama_init.model;
|
||||||
ctx = common_init.context.release();
|
ctx = llama_init.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||||
@@ -570,7 +469,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (multimodal) {
|
if (multimodal) {
|
||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
const int n_embd_llm = llama_model_n_embd(model);
|
const int n_embd_llm = llama_n_embd(model);
|
||||||
if (n_embd_clip != n_embd_llm) {
|
if (n_embd_clip != n_embd_llm) {
|
||||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
@@ -579,54 +478,23 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vocab = llama_model_get_vocab(model);
|
|
||||||
n_ctx = llama_n_ctx(ctx);
|
n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
add_bos_token = llama_vocab_get_add_bos(vocab);
|
add_bos_token = llama_add_bos_token(model);
|
||||||
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
|
||||||
|
|
||||||
if (!params.speculative.model.empty()) {
|
|
||||||
LOG("loading draft model '%s'\n", params.speculative.model.c_str());
|
|
||||||
|
|
||||||
auto params_dft = params;
|
|
||||||
|
|
||||||
params_dft.devices = params.speculative.devices;
|
|
||||||
params_dft.model = params.speculative.model;
|
|
||||||
params_dft.n_ctx = params.speculative.n_ctx == 0 ? params.n_ctx / params.n_parallel : params.speculative.n_ctx;
|
|
||||||
params_dft.n_gpu_layers = params.speculative.n_gpu_layers;
|
|
||||||
params_dft.n_parallel = 1;
|
|
||||||
|
|
||||||
llama_init_dft = common_init_from_params(params_dft);
|
|
||||||
|
|
||||||
model_dft = llama_init_dft.model.get();
|
|
||||||
|
|
||||||
if (model_dft == nullptr) {
|
|
||||||
LOG("failed to load draft model, '%s'\n", params.speculative.model.c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!common_speculative_are_compatible(ctx, llama_init_dft.context.get())) {
|
|
||||||
LOG("the draft model '%s' is not compatible with the target model '%s'\n", params.speculative.model.c_str(), params.model.c_str());
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int n_ctx_dft = llama_n_ctx(llama_init_dft.context.get());
|
|
||||||
|
|
||||||
cparams_dft = common_context_params_to_llama(params_dft);
|
|
||||||
cparams_dft.n_batch = n_ctx_dft;
|
|
||||||
|
|
||||||
// force F16 KV cache for the draft model for extra performance
|
|
||||||
cparams_dft.type_k = GGML_TYPE_F16;
|
|
||||||
cparams_dft.type_v = GGML_TYPE_F16;
|
|
||||||
|
|
||||||
// the context is not needed - we will create one for each slot
|
|
||||||
llama_init_dft.context.reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void validate_model_chat_template(server_params & sparams) {
|
||||||
|
llama_chat_message chat[] = {{"user", "test"}};
|
||||||
|
std::vector<char> buf(1);
|
||||||
|
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||||
|
if (res < 0) {
|
||||||
|
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
||||||
|
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
llama_client_slot* get_active_slot() {
|
llama_client_slot* get_active_slot() {
|
||||||
for (llama_client_slot& slot : slots) {
|
for (llama_client_slot& slot : slots) {
|
||||||
// Check if the slot is currently processing
|
// Check if the slot is currently processing
|
||||||
@@ -652,22 +520,6 @@ struct llama_server_context
|
|||||||
slot.n_ctx = n_ctx_slot;
|
slot.n_ctx = n_ctx_slot;
|
||||||
slot.n_predict = params.n_predict;
|
slot.n_predict = params.n_predict;
|
||||||
|
|
||||||
if (model_dft) {
|
|
||||||
slot.batch_spec = llama_batch_init(params.speculative.n_max + 1, 0, 1);
|
|
||||||
|
|
||||||
ctx_dft = llama_init_from_model(model_dft, cparams_dft);
|
|
||||||
if (ctx_dft == nullptr) {
|
|
||||||
LOG("%s", "failed to create draft context\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
slot.spec = common_speculative_init(ctx_dft);
|
|
||||||
if (slot.spec == nullptr) {
|
|
||||||
LOG("%s", "failed to create speculator\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_INFO("new slot", {
|
LOG_INFO("new slot", {
|
||||||
{"slot_id", slot.id},
|
{"slot_id", slot.id},
|
||||||
{"n_ctx_slot", slot.n_ctx}
|
{"n_ctx_slot", slot.n_ctx}
|
||||||
@@ -726,12 +578,12 @@ struct llama_server_context
|
|||||||
std::vector<llama_token> p;
|
std::vector<llama_token> p;
|
||||||
if (first)
|
if (first)
|
||||||
{
|
{
|
||||||
p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
||||||
}
|
}
|
||||||
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||||
}
|
}
|
||||||
@@ -748,7 +600,7 @@ struct llama_server_context
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto s = json_prompt.template get<std::string>();
|
auto s = json_prompt.template get<std::string>();
|
||||||
prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return prompt_tokens;
|
return prompt_tokens;
|
||||||
@@ -776,17 +628,16 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||||
llama_slot_params default_params;
|
slot_params default_params;
|
||||||
common_params_sampling default_sparams;
|
gpt_sampler_params default_sparams;
|
||||||
|
|
||||||
default_sparams.speculative = params_base.speculative;
|
|
||||||
|
|
||||||
slot->params.stream = json_value(data, "stream", false);
|
slot->params.stream = json_value(data, "stream", false);
|
||||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||||
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||||
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
||||||
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
||||||
|
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
||||||
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||||
@@ -798,21 +649,13 @@ struct llama_server_context
|
|||||||
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||||
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
|
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
|
|
||||||
|
|
||||||
slot->sparams.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
|
|
||||||
slot->sparams.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
|
|
||||||
slot->sparams.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
|
|
||||||
|
|
||||||
slot->sparams.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min);
|
|
||||||
slot->sparams.speculative.n_min = std::max(params.speculative.n_min, 2);
|
|
||||||
slot->sparams.speculative.n_max = std::max(params.speculative.n_max, 0);
|
|
||||||
|
|
||||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||||
// Might be better to reject the request with a 400 ?
|
// Might be better to reject the request with a 400 ?
|
||||||
LOG_WARNING("Max tokens to predict exceeds server configuration", {
|
LOG_WARNING("Max tokens to predict exceeds server configuration", {
|
||||||
@@ -851,8 +694,8 @@ struct llama_server_context
|
|||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (json_value(data, "ignore_eos", false) && has_eos_token) {
|
if (json_value(data, "ignore_eos", false)) {
|
||||||
slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
|
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
slot->sparams.penalty_prompt_tokens.clear();
|
slot->sparams.penalty_prompt_tokens.clear();
|
||||||
@@ -891,13 +734,13 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
slot->sparams.logit_bias.clear();
|
slot->sparams.logit_bias.clear();
|
||||||
|
|
||||||
const auto &logit_bias = data.find("logit_bias");
|
const auto &logit_bias = data.find("logit_bias");
|
||||||
if (logit_bias != data.end() && logit_bias->is_array())
|
if (logit_bias != data.end() && logit_bias->is_array())
|
||||||
{
|
{
|
||||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
const int n_vocab = llama_n_vocab(model);
|
||||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
|
||||||
for (const auto &el : *logit_bias)
|
for (const auto &el : *logit_bias)
|
||||||
{
|
{
|
||||||
if (el.is_array() && el.size() == 2)
|
if (el.is_array() && el.size() == 2)
|
||||||
@@ -926,7 +769,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
else if (el[0].is_string())
|
else if (el[0].is_string())
|
||||||
{
|
{
|
||||||
auto toks = common_tokenize(vocab, el[0].get<std::string>(), false);
|
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
|
||||||
for (auto tok : toks)
|
for (auto tok : toks)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias.push_back({tok, bias});
|
slot->sparams.logit_bias.push_back({tok, bias});
|
||||||
@@ -958,7 +801,7 @@ struct llama_server_context
|
|||||||
sampler_names.emplace_back(name);
|
sampler_names.emplace_back(name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false);
|
slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -1042,9 +885,9 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (slot->ctx_sampling != nullptr)
|
if (slot->ctx_sampling != nullptr)
|
||||||
{
|
{
|
||||||
common_sampler_free(slot->ctx_sampling);
|
gpt_sampler_free(slot->ctx_sampling);
|
||||||
}
|
}
|
||||||
slot->ctx_sampling = common_sampler_init(model, slot->sparams);
|
slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
|
||||||
//llama_set_rng_seed(ctx, slot->params.seed);
|
//llama_set_rng_seed(ctx, slot->params.seed);
|
||||||
slot->command = LOAD_PROMPT;
|
slot->command = LOAD_PROMPT;
|
||||||
|
|
||||||
@@ -1071,13 +914,13 @@ struct llama_server_context
|
|||||||
system_tokens.clear();
|
system_tokens.clear();
|
||||||
|
|
||||||
if (!system_prompt.empty()) {
|
if (!system_prompt.empty()) {
|
||||||
system_tokens = common_tokenize(ctx, system_prompt, add_bos_token);
|
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
|
||||||
|
|
||||||
common_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
for (int i = 0; i < (int)system_tokens.size(); ++i)
|
for (int i = 0; i < (int)system_tokens.size(); ++i)
|
||||||
{
|
{
|
||||||
common_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
|
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
|
||||||
@@ -1091,6 +934,7 @@ struct llama_server_context
|
|||||||
batch.n_seq_id + i,
|
batch.n_seq_id + i,
|
||||||
batch.seq_id + i,
|
batch.seq_id + i,
|
||||||
batch.logits + i,
|
batch.logits + i,
|
||||||
|
0, 0, 0, // unused
|
||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view) != 0)
|
if (llama_decode(ctx, batch_view) != 0)
|
||||||
{
|
{
|
||||||
@@ -1165,7 +1009,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool process_token(completion_token_output &result, llama_client_slot &slot) {
|
bool process_token(completion_token_output &result, llama_client_slot &slot) {
|
||||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||||
const std::string token_str = common_token_to_piece(ctx, result.tok);
|
const std::string token_str = llama_token_to_piece(ctx, result.tok);
|
||||||
slot.sampled = result.tok;
|
slot.sampled = result.tok;
|
||||||
|
|
||||||
// search stop word and delete it
|
// search stop word and delete it
|
||||||
@@ -1256,7 +1100,7 @@ struct llama_server_context
|
|||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
if (result.tok == llama_token_eos(model))
|
||||||
{
|
{
|
||||||
slot.stopped_eos = true;
|
slot.stopped_eos = true;
|
||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
@@ -1316,7 +1160,7 @@ struct llama_server_context
|
|||||||
samplers.reserve(slot.sparams.samplers.size());
|
samplers.reserve(slot.sparams.samplers.size());
|
||||||
for (const auto & sampler : slot.sparams.samplers)
|
for (const auto & sampler : slot.sparams.samplers)
|
||||||
{
|
{
|
||||||
samplers.emplace_back(common_sampler_type_to_str(sampler));
|
samplers.emplace_back(gpt_sampler_type_to_str(sampler));
|
||||||
}
|
}
|
||||||
|
|
||||||
return json {
|
return json {
|
||||||
@@ -1330,6 +1174,7 @@ struct llama_server_context
|
|||||||
{"top_k", slot.sparams.top_k},
|
{"top_k", slot.sparams.top_k},
|
||||||
{"top_p", slot.sparams.top_p},
|
{"top_p", slot.sparams.top_p},
|
||||||
{"min_p", slot.sparams.min_p},
|
{"min_p", slot.sparams.min_p},
|
||||||
|
{"tfs_z", slot.sparams.tfs_z},
|
||||||
{"typical_p", slot.sparams.typ_p},
|
{"typical_p", slot.sparams.typ_p},
|
||||||
{"repeat_last_n", slot.sparams.penalty_last_n},
|
{"repeat_last_n", slot.sparams.penalty_last_n},
|
||||||
{"repeat_penalty", slot.sparams.penalty_repeat},
|
{"repeat_penalty", slot.sparams.penalty_repeat},
|
||||||
@@ -1338,12 +1183,13 @@ struct llama_server_context
|
|||||||
{"mirostat", slot.sparams.mirostat},
|
{"mirostat", slot.sparams.mirostat},
|
||||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||||
|
{"penalize_nl", slot.sparams.penalize_nl},
|
||||||
{"stop", slot.params.antiprompt},
|
{"stop", slot.params.antiprompt},
|
||||||
{"n_predict", slot.params.n_predict},
|
{"n_predict", slot.params.n_predict},
|
||||||
{"n_keep", params.n_keep},
|
{"n_keep", params.n_keep},
|
||||||
{"ignore_eos", slot.sparams.ignore_eos},
|
{"ignore_eos", slot.sparams.ignore_eos},
|
||||||
{"stream", slot.params.stream},
|
{"stream", slot.params.stream},
|
||||||
// {"logit_bias", slot.sparams.logit_bias},
|
// {"logit_bias", slot.sparams.logit_bias},
|
||||||
{"n_probs", slot.sparams.n_probs},
|
{"n_probs", slot.sparams.n_probs},
|
||||||
{"min_keep", slot.sparams.min_keep},
|
{"min_keep", slot.sparams.min_keep},
|
||||||
{"grammar", slot.sparams.grammar},
|
{"grammar", slot.sparams.grammar},
|
||||||
@@ -1370,7 +1216,7 @@ struct llama_server_context
|
|||||||
if (slot.sparams.n_probs > 0)
|
if (slot.sparams.n_probs > 0)
|
||||||
{
|
{
|
||||||
std::vector<completion_token_output> probs_output = {};
|
std::vector<completion_token_output> probs_output = {};
|
||||||
const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false);
|
const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
|
||||||
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
|
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
|
||||||
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
|
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
|
||||||
if (probs_pos < probs_stop_pos)
|
if (probs_pos < probs_stop_pos)
|
||||||
@@ -1422,7 +1268,7 @@ struct llama_server_context
|
|||||||
std::vector<completion_token_output> probs = {};
|
std::vector<completion_token_output> probs = {};
|
||||||
if (!slot.params.stream && slot.stopped_word)
|
if (!slot.params.stream && slot.stopped_word)
|
||||||
{
|
{
|
||||||
const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false);
|
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
|
||||||
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
|
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1451,7 +1297,7 @@ struct llama_server_context
|
|||||||
res.error = false;
|
res.error = false;
|
||||||
res.stop = true;
|
res.stop = true;
|
||||||
|
|
||||||
const int n_embd = llama_model_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
if (!params.embedding)
|
if (!params.embedding)
|
||||||
{
|
{
|
||||||
LOG_WARNING("embedding disabled", {
|
LOG_WARNING("embedding disabled", {
|
||||||
@@ -1533,6 +1379,7 @@ struct llama_server_context
|
|||||||
batch.n_seq_id + i,
|
batch.n_seq_id + i,
|
||||||
batch.seq_id + i,
|
batch.seq_id + i,
|
||||||
batch.logits + i,
|
batch.logits + i,
|
||||||
|
0, 0, 0, // unused
|
||||||
};
|
};
|
||||||
if (llama_decode(ctx, batch_view))
|
if (llama_decode(ctx, batch_view))
|
||||||
{
|
{
|
||||||
@@ -1550,10 +1397,9 @@ struct llama_server_context
|
|||||||
n_eval = n_batch;
|
n_eval = n_batch;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int n_embd = llama_model_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
float * embd = img.image_embedding + i * n_embd;
|
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
||||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
if (llama_decode(ctx, batch_img))
|
||||||
if (llama_decode(ctx, llava_batch.batch))
|
|
||||||
{
|
{
|
||||||
LOG("%s : failed to eval image\n", __func__);
|
LOG("%s : failed to eval image\n", __func__);
|
||||||
return false;
|
return false;
|
||||||
@@ -1562,7 +1408,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
image_idx++;
|
image_idx++;
|
||||||
|
|
||||||
common_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
// append prefix of next image
|
// append prefix of next image
|
||||||
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
|
||||||
@@ -1572,7 +1418,7 @@ struct llama_server_context
|
|||||||
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
|
||||||
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
for (int i = 0; i < (int) append_tokens.size(); ++i)
|
||||||
{
|
{
|
||||||
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
|
||||||
slot.n_past += 1;
|
slot.n_past += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1704,7 +1550,7 @@ struct llama_server_context
|
|||||||
update_system_prompt();
|
update_system_prompt();
|
||||||
}
|
}
|
||||||
|
|
||||||
common_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
if (all_slots_are_idle)
|
if (all_slots_are_idle)
|
||||||
{
|
{
|
||||||
@@ -1782,7 +1628,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
// TODO: we always have to take into account the "system_tokens"
|
// TODO: we always have to take into account the "system_tokens"
|
||||||
// this is not great and needs to be improved somehow
|
// this is not great and needs to be improved somehow
|
||||||
common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
|
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
|
||||||
slot.n_past += 1;
|
slot.n_past += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1831,11 +1677,11 @@ struct llama_server_context
|
|||||||
suffix_tokens.erase(suffix_tokens.begin());
|
suffix_tokens.erase(suffix_tokens.begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab));
|
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS
|
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
|
||||||
prefix_tokens.insert(prefix_tokens.end(), llama_vocab_fim_suf(vocab));
|
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
|
||||||
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
||||||
prefix_tokens.push_back(llama_vocab_fim_mid(vocab));
|
prefix_tokens.push_back(llama_token_middle(model));
|
||||||
prompt_tokens = prefix_tokens;
|
prompt_tokens = prefix_tokens;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -1876,7 +1722,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (!slot.params.cache_prompt)
|
if (!slot.params.cache_prompt)
|
||||||
{
|
{
|
||||||
common_sampler_reset(slot.ctx_sampling);
|
gpt_sampler_reset(slot.ctx_sampling);
|
||||||
|
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
slot.n_past_se = 0;
|
slot.n_past_se = 0;
|
||||||
@@ -1888,7 +1734,7 @@ struct llama_server_context
|
|||||||
// push the prompt into the sampling context (do not apply grammar)
|
// push the prompt into the sampling context (do not apply grammar)
|
||||||
for (auto &token : prompt_tokens)
|
for (auto &token : prompt_tokens)
|
||||||
{
|
{
|
||||||
common_sampler_accept(slot.ctx_sampling, token, false);
|
gpt_sampler_accept(slot.ctx_sampling, token, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
|
||||||
@@ -1980,7 +1826,7 @@ struct llama_server_context
|
|||||||
ga_i += ga_w/ga_n;
|
ga_i += ga_w/ga_n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
|
llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
|
||||||
slot_npast++;
|
slot_npast++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2058,6 +1904,7 @@ struct llama_server_context
|
|||||||
batch.n_seq_id + i,
|
batch.n_seq_id + i,
|
||||||
batch.seq_id + i,
|
batch.seq_id + i,
|
||||||
batch.logits + i,
|
batch.logits + i,
|
||||||
|
0, 0, 0, // unused
|
||||||
};
|
};
|
||||||
|
|
||||||
const int ret = llama_decode(ctx, batch_view);
|
const int ret = llama_decode(ctx, batch_view);
|
||||||
@@ -2096,9 +1943,9 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
completion_token_output result;
|
completion_token_output result;
|
||||||
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
|
const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
|
||||||
|
|
||||||
common_sampler_accept(slot.ctx_sampling, id, true);
|
gpt_sampler_accept(slot.ctx_sampling, id, true);
|
||||||
|
|
||||||
slot.n_decoded += 1;
|
slot.n_decoded += 1;
|
||||||
if (slot.n_decoded == 1)
|
if (slot.n_decoded == 1)
|
||||||
@@ -2109,7 +1956,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
result.tok = id;
|
result.tok = id;
|
||||||
const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling);
|
const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
|
||||||
|
|
||||||
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
|
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
|
||||||
result.probs.push_back({
|
result.probs.push_back({
|
||||||
@@ -2130,97 +1977,6 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// do speculative decoding
|
|
||||||
for (auto & slot : slots) {
|
|
||||||
if (!slot.is_processing() || !(ctx_dft && params.speculative.n_max > 0)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (slot.state != PROCESSING) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// determine the max draft that fits the current slot state
|
|
||||||
int n_draft_max = slot.params.speculative.n_max;
|
|
||||||
|
|
||||||
// note: n_past is not yet increased for the `id` token sampled above
|
|
||||||
// also, need to leave space for 1 extra token to allow context shifts
|
|
||||||
n_draft_max = std::min(n_draft_max, slot.n_ctx - slot.n_past - 2);
|
|
||||||
|
|
||||||
if (slot.n_remaining > 0) {
|
|
||||||
n_draft_max = std::min(n_draft_max, slot.n_remaining - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG("max possible draft: %d\n", n_draft_max);
|
|
||||||
|
|
||||||
if (n_draft_max < slot.params.speculative.n_min) {
|
|
||||||
LOG("the max possible draft is too small: %d < %d - skipping speculative decoding\n", n_draft_max, slot.params.speculative.n_min);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_token id = slot.sampled;
|
|
||||||
|
|
||||||
struct common_speculative_params params_spec;
|
|
||||||
params_spec.n_draft = n_draft_max;
|
|
||||||
params_spec.n_reuse = llama_n_ctx(ctx_dft) - slot.params.speculative.n_max;
|
|
||||||
params_spec.p_min = slot.params.speculative.p_min;
|
|
||||||
|
|
||||||
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id);
|
|
||||||
|
|
||||||
// ignore small drafts
|
|
||||||
if (slot.params.speculative.n_min > (int) draft.size()) {
|
|
||||||
LOG("ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// construct the speculation batch
|
|
||||||
common_batch_clear(slot.batch_spec);
|
|
||||||
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < draft.size(); ++i) {
|
|
||||||
common_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG("decoding speculative batch, size = %d\n", slot.batch_spec.n_tokens);
|
|
||||||
|
|
||||||
llama_decode(ctx, slot.batch_spec);
|
|
||||||
|
|
||||||
// the accepted tokens from the speculation
|
|
||||||
const auto ids = common_sampler_sample_and_accept_n(slot.ctx_sampling, ctx, draft);
|
|
||||||
|
|
||||||
slot.n_past += ids.size();
|
|
||||||
slot.n_decoded += ids.size();
|
|
||||||
|
|
||||||
slot.cache_tokens.push_back(id);
|
|
||||||
slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
|
|
||||||
|
|
||||||
llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < ids.size(); ++i) {
|
|
||||||
completion_token_output result;
|
|
||||||
|
|
||||||
result.tok = ids[i];
|
|
||||||
result.text_to_send = common_token_to_piece(ctx, result.tok, params.special);
|
|
||||||
//result.prob = 1.0f; // set later
|
|
||||||
|
|
||||||
// TODO: set result.probs
|
|
||||||
|
|
||||||
if (!process_token(result, slot)) {
|
|
||||||
// release slot because of stop condition
|
|
||||||
slot.release();
|
|
||||||
slot.print_timings();
|
|
||||||
send_final_response(slot);
|
|
||||||
metrics.on_prediction(slot);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG("accepted %d/%d draft tokens, new n_past = %d\n", (int) ids.size() - 1, (int) draft.size(), slot.n_past);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
LOG_VERBOSE("slots updated", {});
|
LOG_VERBOSE("slots updated", {});
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -2253,7 +2009,7 @@ static json format_partial_response(
|
|||||||
struct token_translator
|
struct token_translator
|
||||||
{
|
{
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); }
|
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
|
||||||
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
|
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2318,6 +2074,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||||
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
||||||
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
|
// slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
||||||
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
||||||
@@ -2327,6 +2084,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||||
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||||
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
|
// slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
// slot->params.seed = json_value(data, "seed", default_params.seed);
|
// slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||||
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
@@ -2340,6 +2098,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
|
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
|
||||||
data["top_k"] = predict->topk();
|
data["top_k"] = predict->topk();
|
||||||
data["top_p"] = predict->topp();
|
data["top_p"] = predict->topp();
|
||||||
|
data["tfs_z"] = predict->tailfreesamplingz();
|
||||||
data["typical_p"] = predict->typicalp();
|
data["typical_p"] = predict->typicalp();
|
||||||
data["temperature"] = predict->temperature();
|
data["temperature"] = predict->temperature();
|
||||||
data["repeat_last_n"] = predict->repeat();
|
data["repeat_last_n"] = predict->repeat();
|
||||||
@@ -2349,6 +2108,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["mirostat"] = predict->mirostat();
|
data["mirostat"] = predict->mirostat();
|
||||||
data["mirostat_tau"] = predict->mirostattau();
|
data["mirostat_tau"] = predict->mirostattau();
|
||||||
data["mirostat_eta"] = predict->mirostateta();
|
data["mirostat_eta"] = predict->mirostateta();
|
||||||
|
data["penalize_nl"] = predict->penalizenl();
|
||||||
data["n_keep"] = predict->nkeep();
|
data["n_keep"] = predict->nkeep();
|
||||||
data["seed"] = predict->seed();
|
data["seed"] = predict->seed();
|
||||||
data["grammar"] = predict->grammar();
|
data["grammar"] = predict->grammar();
|
||||||
@@ -2385,6 +2145,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
|
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
|
||||||
// llama.params.sparams.top_k = predict->topk();
|
// llama.params.sparams.top_k = predict->topk();
|
||||||
// llama.params.sparams.top_p = predict->topp();
|
// llama.params.sparams.top_p = predict->topp();
|
||||||
|
// llama.params.sparams.tfs_z = predict->tailfreesamplingz();
|
||||||
// llama.params.sparams.typical_p = predict->typicalp();
|
// llama.params.sparams.typical_p = predict->typicalp();
|
||||||
// llama.params.sparams.penalty_last_n = predict->repeat();
|
// llama.params.sparams.penalty_last_n = predict->repeat();
|
||||||
// llama.params.sparams.temp = predict->temperature();
|
// llama.params.sparams.temp = predict->temperature();
|
||||||
@@ -2394,6 +2155,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// llama.params.sparams.mirostat = predict->mirostat();
|
// llama.params.sparams.mirostat = predict->mirostat();
|
||||||
// llama.params.sparams.mirostat_tau = predict->mirostattau();
|
// llama.params.sparams.mirostat_tau = predict->mirostattau();
|
||||||
// llama.params.sparams.mirostat_eta = predict->mirostateta();
|
// llama.params.sparams.mirostat_eta = predict->mirostateta();
|
||||||
|
// llama.params.sparams.penalize_nl = predict->penalizenl();
|
||||||
// llama.params.n_keep = predict->nkeep();
|
// llama.params.n_keep = predict->nkeep();
|
||||||
// llama.params.seed = predict->seed();
|
// llama.params.seed = predict->seed();
|
||||||
// llama.params.sparams.grammar = predict->grammar();
|
// llama.params.sparams.grammar = predict->grammar();
|
||||||
@@ -2440,37 +2202,8 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
const std::vector<ggml_type> kv_cache_types = {
|
|
||||||
GGML_TYPE_F32,
|
|
||||||
GGML_TYPE_F16,
|
|
||||||
GGML_TYPE_BF16,
|
|
||||||
GGML_TYPE_Q8_0,
|
|
||||||
GGML_TYPE_Q4_0,
|
|
||||||
GGML_TYPE_Q4_1,
|
|
||||||
GGML_TYPE_IQ4_NL,
|
|
||||||
GGML_TYPE_Q5_0,
|
|
||||||
GGML_TYPE_Q5_1,
|
|
||||||
};
|
|
||||||
|
|
||||||
static ggml_type kv_cache_type_from_str(const std::string & s) {
|
|
||||||
for (const auto & type : kv_cache_types) {
|
|
||||||
if (ggml_type_name(type) == s) {
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw std::runtime_error("Unsupported cache type: " + s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string get_all_kv_cache_types() {
|
|
||||||
std::ostringstream msg;
|
|
||||||
for (const auto & type : kv_cache_types) {
|
|
||||||
msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
|
|
||||||
}
|
|
||||||
return msg.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void params_parse(const backend::ModelOptions* request,
|
static void params_parse(const backend::ModelOptions* request,
|
||||||
common_params & params) {
|
gpt_params & params) {
|
||||||
|
|
||||||
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
|
||||||
|
|
||||||
@@ -2482,41 +2215,11 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
}
|
}
|
||||||
// params.model_alias ??
|
// params.model_alias ??
|
||||||
params.model_alias = request->modelfile();
|
params.model_alias = request->modelfile();
|
||||||
if (!request->cachetypekey().empty()) {
|
|
||||||
params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
|
|
||||||
}
|
|
||||||
if (!request->cachetypevalue().empty()) {
|
|
||||||
params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
|
|
||||||
}
|
|
||||||
params.n_ctx = request->contextsize();
|
params.n_ctx = request->contextsize();
|
||||||
//params.memory_f16 = request->f16memory();
|
//params.memory_f16 = request->f16memory();
|
||||||
params.cpuparams.n_threads = request->threads();
|
params.cpuparams.n_threads = request->threads();
|
||||||
params.n_gpu_layers = request->ngpulayers();
|
params.n_gpu_layers = request->ngpulayers();
|
||||||
params.n_batch = request->nbatch();
|
params.n_batch = request->nbatch();
|
||||||
params.speculative.model = request->draftmodel();
|
|
||||||
|
|
||||||
// If options is not NULL, parse options
|
|
||||||
for (int i = 0; request->options()[i] != NULL; i++) {
|
|
||||||
char *optname = strtok(request->options()[i], ":");
|
|
||||||
char *optval = strtok(NULL, ":");
|
|
||||||
if (optval == NULL) {
|
|
||||||
optval = "true";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcmp(optname, "speculative.n_gpu_layers")) {
|
|
||||||
params.speculative.n_gpu_layers = std::stoi(optval);
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "speculative.n_ctx")) {
|
|
||||||
params.speculative.n_ctx = std::stoi(optval);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if params.speculative.n_gpu_layers == 0 {
|
|
||||||
params.speculative.n_gpu_layers = params.n_gpu_layers;
|
|
||||||
}
|
|
||||||
if params.speculative.n_ctx == 0 {
|
|
||||||
params.speculative.n_ctx = params.n_ctx;
|
|
||||||
}
|
|
||||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||||
//params.n_parallel = 1;
|
//params.n_parallel = 1;
|
||||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||||
@@ -2529,7 +2232,7 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
|
|
||||||
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
||||||
if (llama_grpc_servers != NULL) {
|
if (llama_grpc_servers != NULL) {
|
||||||
add_rpc_devices(std::string(llama_grpc_servers));
|
params.rpc_servers = std::string(llama_grpc_servers);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Add yarn
|
// TODO: Add yarn
|
||||||
@@ -2570,7 +2273,6 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.use_mmap = request->mmap();
|
params.use_mmap = request->mmap();
|
||||||
params.flash_attn = request->flashattention();
|
params.flash_attn = request->flashattention();
|
||||||
params.no_kv_offload = request->nokvoffload();
|
params.no_kv_offload = request->nokvoffload();
|
||||||
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
|
|
||||||
|
|
||||||
params.embedding = request->embeddings();
|
params.embedding = request->embeddings();
|
||||||
|
|
||||||
@@ -2609,7 +2311,7 @@ public:
|
|||||||
|
|
||||||
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
|
||||||
// Implement LoadModel RPC
|
// Implement LoadModel RPC
|
||||||
common_params params;
|
gpt_params params;
|
||||||
params_parse(request, params);
|
params_parse(request, params);
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
@@ -2655,13 +2357,6 @@ public:
|
|||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
reply.set_prompt_tokens(tokens_evaluated);
|
||||||
|
|
||||||
if (result.result_json.contains("timings")) {
|
|
||||||
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
|
||||||
reply.set_timing_prompt_processing(timing_prompt_processing);
|
|
||||||
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
|
||||||
reply.set_timing_token_generation(timing_token_generation);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log Request Correlation Id
|
// Log Request Correlation Id
|
||||||
LOG_VERBOSE("correlation:", {
|
LOG_VERBOSE("correlation:", {
|
||||||
{ "id", data["correlation_id"] }
|
{ "id", data["correlation_id"] }
|
||||||
@@ -2702,13 +2397,6 @@ public:
|
|||||||
reply->set_prompt_tokens(tokens_evaluated);
|
reply->set_prompt_tokens(tokens_evaluated);
|
||||||
reply->set_tokens(tokens_predicted);
|
reply->set_tokens(tokens_predicted);
|
||||||
reply->set_message(completion_text);
|
reply->set_message(completion_text);
|
||||||
|
|
||||||
if (result.result_json.contains("timings")) {
|
|
||||||
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
|
||||||
reply->set_timing_prompt_processing(timing_prompt_processing);
|
|
||||||
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
|
||||||
reply->set_timing_token_generation(timing_token_generation);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
index 3cd0d2fa..6c5e811a 100644
|
index 342042ff..224db9b5 100644
|
||||||
--- a/examples/llava/clip.cpp
|
--- a/examples/llava/clip.cpp
|
||||||
+++ b/examples/llava/clip.cpp
|
+++ b/examples/llava/clip.cpp
|
||||||
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||||
for (int i = 0; i < num_patches; i++) {
|
for (int i = 0; i < num_patches; i++) {
|
||||||
- patches_data[i] = i + 1;
|
- patches_data[i] = i + 1;
|
||||||
+ patches_data[i] = i;
|
+ patches_data[i] = i;
|
||||||
}
|
}
|
||||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
||||||
free(patches_data);
|
free(patches_data);
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
|
|
||||||
BUILD_TYPE?=
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
gobark.o:
|
|
||||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
|
||||||
|
|
||||||
libbark.a: gobark.o
|
|
||||||
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
|
||||||
$(AR) rcs libbark.a gobark.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f gobark.o libbark.a
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <tuple>
|
|
||||||
|
|
||||||
#include "bark.h"
|
|
||||||
#include "gobark.h"
|
|
||||||
#include "common.h"
|
|
||||||
#include "ggml.h"
|
|
||||||
|
|
||||||
struct bark_context *c;
|
|
||||||
|
|
||||||
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
|
|
||||||
if (step == bark_encoding_step::SEMANTIC) {
|
|
||||||
printf("\rGenerating semantic tokens... %d%%", progress);
|
|
||||||
} else if (step == bark_encoding_step::COARSE) {
|
|
||||||
printf("\rGenerating coarse tokens... %d%%", progress);
|
|
||||||
} else if (step == bark_encoding_step::FINE) {
|
|
||||||
printf("\rGenerating fine tokens... %d%%", progress);
|
|
||||||
}
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
int load_model(char *model) {
|
|
||||||
// initialize bark context
|
|
||||||
struct bark_context_params ctx_params = bark_context_default_params();
|
|
||||||
bark_params params;
|
|
||||||
|
|
||||||
params.model_path = model;
|
|
||||||
|
|
||||||
// ctx_params.verbosity = verbosity;
|
|
||||||
ctx_params.progress_callback = bark_print_progress_callback;
|
|
||||||
ctx_params.progress_callback_user_data = nullptr;
|
|
||||||
|
|
||||||
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
|
|
||||||
if (!bctx) {
|
|
||||||
fprintf(stderr, "%s: Could not load model\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
c = bctx;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int tts(char *text,int threads, char *dst ) {
|
|
||||||
|
|
||||||
ggml_time_init();
|
|
||||||
const int64_t t_main_start_us = ggml_time_us();
|
|
||||||
|
|
||||||
// generate audio
|
|
||||||
if (!bark_generate_audio(c, text, threads)) {
|
|
||||||
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const float *audio_data = bark_get_audio_data(c);
|
|
||||||
if (audio_data == NULL) {
|
|
||||||
fprintf(stderr, "%s: Could not get audio data\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int audio_arr_size = bark_get_audio_data_size(c);
|
|
||||||
|
|
||||||
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
|
|
||||||
|
|
||||||
write_wav_on_disk(audio_arr, dst);
|
|
||||||
|
|
||||||
// report timing
|
|
||||||
{
|
|
||||||
const int64_t t_main_end_us = ggml_time_us();
|
|
||||||
const int64_t t_load_us = bark_get_load_time(c);
|
|
||||||
const int64_t t_eval_us = bark_get_eval_time(c);
|
|
||||||
|
|
||||||
printf("\n\n");
|
|
||||||
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
|
|
||||||
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
|
|
||||||
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int unload() {
|
|
||||||
bark_free(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
|
||||||
// #include <gobark.h>
|
|
||||||
// #include <stdlib.h>
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Bark struct {
|
|
||||||
base.SingleThread
|
|
||||||
threads int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Bark) Load(opts *pb.ModelOptions) error {
|
|
||||||
|
|
||||||
sd.threads = int(opts.Threads)
|
|
||||||
|
|
||||||
modelFile := C.CString(opts.ModelFile)
|
|
||||||
defer C.free(unsafe.Pointer(modelFile))
|
|
||||||
|
|
||||||
ret := C.load_model(modelFile)
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
|
|
||||||
t := C.CString(opts.Text)
|
|
||||||
defer C.free(unsafe.Pointer(t))
|
|
||||||
|
|
||||||
dst := C.CString(opts.Dst)
|
|
||||||
defer C.free(unsafe.Pointer(dst))
|
|
||||||
|
|
||||||
threads := C.int(sd.threads)
|
|
||||||
|
|
||||||
ret := C.tts(t, threads, dst)
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
int load_model(char *model);
|
|
||||||
int tts(char *text,int threads, char *dst );
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
CMAKE_ARGS?=
|
|
||||||
BUILD_TYPE?=
|
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# to CMAKE_ARGS automatically
|
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
|
||||||
# But if it's OSX without metal, disable it here
|
|
||||||
else ifeq ($(OS),Darwin)
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f16)
|
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
|
|
||||||
# endif
|
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f32)
|
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
|
|
||||||
# endif
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
# Find all .a archives in ARCHIVE_DIR
|
|
||||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
|
||||||
GGML_ARCHIVE_DIR := build/ggml/src/
|
|
||||||
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
|
|
||||||
|
|
||||||
# Name of the single merged library
|
|
||||||
COMBINED_LIB := libggmlall.a
|
|
||||||
|
|
||||||
# Rule to merge all the .a files into one
|
|
||||||
$(COMBINED_LIB): $(ALL_ARCHIVES)
|
|
||||||
@echo "Merging all .a into $(COMBINED_LIB)"
|
|
||||||
rm -f $@
|
|
||||||
mkdir -p merge-tmp
|
|
||||||
for a in $(ALL_ARCHIVES); do \
|
|
||||||
( cd merge-tmp && ar x ../$$a ); \
|
|
||||||
done
|
|
||||||
( cd merge-tmp && ar rcs ../$@ *.o )
|
|
||||||
# Ensure we have a proper index
|
|
||||||
ranlib $@
|
|
||||||
# Clean up
|
|
||||||
rm -rf merge-tmp
|
|
||||||
|
|
||||||
build/libstable-diffusion.a:
|
|
||||||
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release"
|
|
||||||
else
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
endif
|
|
||||||
$(MAKE) $(COMBINED_LIB)
|
|
||||||
|
|
||||||
gosd.o:
|
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
|
||||||
|
|
||||||
libsd.a: gosd.o
|
|
||||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
|
||||||
$(AR) rcs libsd.a gosd.o
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
|
||||||
@@ -1,228 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <random>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include "gosd.h"
|
|
||||||
|
|
||||||
// #include "preprocessing.hpp"
|
|
||||||
#include "flux.hpp"
|
|
||||||
#include "stable-diffusion.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_STATIC
|
|
||||||
#include "stb_image.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_WRITE_STATIC
|
|
||||||
#include "stb_image_write.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_RESIZE_STATIC
|
|
||||||
#include "stb_image_resize.h"
|
|
||||||
|
|
||||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
|
||||||
const char* sample_method_str[] = {
|
|
||||||
"euler_a",
|
|
||||||
"euler",
|
|
||||||
"heun",
|
|
||||||
"dpm2",
|
|
||||||
"dpm++2s_a",
|
|
||||||
"dpm++2m",
|
|
||||||
"dpm++2mv2",
|
|
||||||
"ipndm",
|
|
||||||
"ipndm_v",
|
|
||||||
"lcm",
|
|
||||||
};
|
|
||||||
|
|
||||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
|
||||||
const char* schedule_str[] = {
|
|
||||||
"default",
|
|
||||||
"discrete",
|
|
||||||
"karras",
|
|
||||||
"exponential",
|
|
||||||
"ays",
|
|
||||||
"gits",
|
|
||||||
};
|
|
||||||
|
|
||||||
sd_ctx_t* sd_c;
|
|
||||||
|
|
||||||
sample_method_t sample_method;
|
|
||||||
|
|
||||||
int load_model(char *model, char* options[], int threads, int diff) {
|
|
||||||
fprintf (stderr, "Loading model!\n");
|
|
||||||
|
|
||||||
char *stableDiffusionModel = "";
|
|
||||||
if (diff == 1 ) {
|
|
||||||
stableDiffusionModel = model;
|
|
||||||
model = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
|
||||||
char *clip_l_path = "";
|
|
||||||
char *clip_g_path = "";
|
|
||||||
char *t5xxl_path = "";
|
|
||||||
char *vae_path = "";
|
|
||||||
char *scheduler = "";
|
|
||||||
char *sampler = "";
|
|
||||||
|
|
||||||
// If options is not NULL, parse options
|
|
||||||
for (int i = 0; options[i] != NULL; i++) {
|
|
||||||
char *optname = strtok(options[i], ":");
|
|
||||||
char *optval = strtok(NULL, ":");
|
|
||||||
if (optval == NULL) {
|
|
||||||
optval = "true";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcmp(optname, "clip_l_path")) {
|
|
||||||
clip_l_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "clip_g_path")) {
|
|
||||||
clip_g_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "t5xxl_path")) {
|
|
||||||
t5xxl_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "vae_path")) {
|
|
||||||
vae_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "scheduler")) {
|
|
||||||
scheduler = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "sampler")) {
|
|
||||||
sampler = optval;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int sample_method_found = -1;
|
|
||||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
|
||||||
if (!strcmp(sampler, sample_method_str[m])) {
|
|
||||||
sample_method_found = m;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (sample_method_found == -1) {
|
|
||||||
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
|
|
||||||
sample_method_found = EULER_A;
|
|
||||||
}
|
|
||||||
sample_method = (sample_method_t)sample_method_found;
|
|
||||||
|
|
||||||
int schedule_found = -1;
|
|
||||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
|
||||||
if (!strcmp(scheduler, schedule_str[d])) {
|
|
||||||
schedule_found = d;
|
|
||||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (schedule_found == -1) {
|
|
||||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
|
||||||
schedule_found = DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
schedule_t schedule = (schedule_t)schedule_found;
|
|
||||||
|
|
||||||
fprintf (stderr, "Creating context\n");
|
|
||||||
sd_ctx_t* sd_ctx = new_sd_ctx(model,
|
|
||||||
clip_l_path,
|
|
||||||
clip_g_path,
|
|
||||||
t5xxl_path,
|
|
||||||
stableDiffusionModel,
|
|
||||||
vae_path,
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
threads,
|
|
||||||
SD_TYPE_COUNT,
|
|
||||||
STD_DEFAULT_RNG,
|
|
||||||
schedule,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false);
|
|
||||||
|
|
||||||
if (sd_ctx == NULL) {
|
|
||||||
fprintf (stderr, "failed loading model (generic error)\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fprintf (stderr, "Created context: OK\n");
|
|
||||||
|
|
||||||
sd_c = sd_ctx;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
|
|
||||||
|
|
||||||
sd_image_t* results;
|
|
||||||
|
|
||||||
std::vector<int> skip_layers = {7, 8, 9};
|
|
||||||
|
|
||||||
fprintf (stderr, "Generating image\n");
|
|
||||||
|
|
||||||
results = txt2img(sd_c,
|
|
||||||
text,
|
|
||||||
negativeText,
|
|
||||||
-1, //clip_skip
|
|
||||||
cfg_scale, // sfg_scale
|
|
||||||
3.5f,
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
sample_method,
|
|
||||||
steps,
|
|
||||||
seed,
|
|
||||||
1,
|
|
||||||
NULL,
|
|
||||||
0.9f,
|
|
||||||
20.f,
|
|
||||||
false,
|
|
||||||
"",
|
|
||||||
skip_layers.data(),
|
|
||||||
skip_layers.size(),
|
|
||||||
0,
|
|
||||||
0.01,
|
|
||||||
0.2);
|
|
||||||
|
|
||||||
if (results == NULL) {
|
|
||||||
fprintf (stderr, "NO results\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results[0].data == NULL) {
|
|
||||||
fprintf (stderr, "Results with no data\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf (stderr, "Writing PNG\n");
|
|
||||||
|
|
||||||
fprintf (stderr, "DST: %s\n", dst);
|
|
||||||
fprintf (stderr, "Width: %d\n", results[0].width);
|
|
||||||
fprintf (stderr, "Height: %d\n", results[0].height);
|
|
||||||
fprintf (stderr, "Channel: %d\n", results[0].channel);
|
|
||||||
fprintf (stderr, "Data: %p\n", results[0].data);
|
|
||||||
|
|
||||||
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
|
|
||||||
results[0].data, 0, NULL);
|
|
||||||
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
|
|
||||||
|
|
||||||
// TODO: free results. Why does it crash?
|
|
||||||
|
|
||||||
free(results[0].data);
|
|
||||||
results[0].data = NULL;
|
|
||||||
free(results);
|
|
||||||
fprintf (stderr, "gen_image is done", dst);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int unload() {
|
|
||||||
free_sd_ctx(sd_c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
|
|
||||||
// #include <gosd.h>
|
|
||||||
// #include <stdlib.h>
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SDGGML struct {
|
|
||||||
base.SingleThread
|
|
||||||
threads int
|
|
||||||
sampleMethod string
|
|
||||||
cfgScale float32
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
|
||||||
|
|
||||||
sd.threads = int(opts.Threads)
|
|
||||||
|
|
||||||
modelFile := C.CString(opts.ModelFile)
|
|
||||||
defer C.free(unsafe.Pointer(modelFile))
|
|
||||||
|
|
||||||
var options **C.char
|
|
||||||
// prepare the options array to pass to C
|
|
||||||
|
|
||||||
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
|
|
||||||
length := C.size_t(len(opts.Options))
|
|
||||||
options = (**C.char)(C.malloc(length * size))
|
|
||||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
|
|
||||||
|
|
||||||
var diffusionModel int
|
|
||||||
|
|
||||||
var oo []string
|
|
||||||
for _, op := range opts.Options {
|
|
||||||
if op == "diffusion_model" {
|
|
||||||
diffusionModel = 1
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it's an option path, we resolve absolute path from the model path
|
|
||||||
if strings.Contains(op, ":") && strings.Contains(op, "path") {
|
|
||||||
data := strings.Split(op, ":")
|
|
||||||
data[1] = filepath.Join(opts.ModelPath, data[1])
|
|
||||||
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
|
|
||||||
oo = append(oo, strings.Join(data, ":"))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
oo = append(oo, op)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
|
|
||||||
|
|
||||||
for i, x := range oo {
|
|
||||||
view[i] = C.CString(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
sd.cfgScale = opts.CFGScale
|
|
||||||
|
|
||||||
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("could not load model")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
t := C.CString(opts.PositivePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(t))
|
|
||||||
|
|
||||||
dst := C.CString(opts.Dst)
|
|
||||||
defer C.free(unsafe.Pointer(dst))
|
|
||||||
|
|
||||||
negative := C.CString(opts.NegativePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(negative))
|
|
||||||
|
|
||||||
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
int load_model(char *model, char* options[], int threads, int diffusionModel);
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
@@ -14,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Bark{}); err != nil {
|
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
33
backend/go/image/stablediffusion/stablediffusion.go
Normal file
33
backend/go/image/stablediffusion/stablediffusion.go
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
base.SingleThread
|
||||||
|
stablediffusion *stablediffusion.StableDiffusion
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||||
|
var err error
|
||||||
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||||
|
return image.stablediffusion.GenerateImage(
|
||||||
|
int(opts.Height),
|
||||||
|
int(opts.Width),
|
||||||
|
int(opts.Mode),
|
||||||
|
int(opts.Step),
|
||||||
|
int(opts.Seed),
|
||||||
|
opts.PositivePrompt,
|
||||||
|
opts.NegativePrompt,
|
||||||
|
opts.Dst)
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
@@ -14,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
|
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
32
backend/go/image/tinydream/tinydream.go
Normal file
32
backend/go/image/tinydream/tinydream.go
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
"github.com/mudler/LocalAI/pkg/tinydream"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
base.SingleThread
|
||||||
|
tinydream *tinydream.TinyDream
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||||
|
var err error
|
||||||
|
// Note: the Model here is a path to a directory containing the model files
|
||||||
|
image.tinydream, err = tinydream.New(opts.ModelFile)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||||
|
return image.tinydream.GenerateImage(
|
||||||
|
int(opts.Height),
|
||||||
|
int(opts.Width),
|
||||||
|
int(opts.Step),
|
||||||
|
int(opts.Seed),
|
||||||
|
opts.PositivePrompt,
|
||||||
|
opts.NegativePrompt,
|
||||||
|
opts.Dst)
|
||||||
|
}
|
||||||
34
backend/go/llm/bert/bert.go
Normal file
34
backend/go/llm/bert/bert.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
bert "github.com/go-skynet/go-bert.cpp"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Embeddings struct {
|
||||||
|
base.SingleThread
|
||||||
|
bert *bert.Bert
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||||
|
model, err := bert.New(opts.ModelFile)
|
||||||
|
llm.bert = model
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||||
|
|
||||||
|
if len(opts.EmbeddingTokens) > 0 {
|
||||||
|
tokens := []int{}
|
||||||
|
for _, t := range opts.EmbeddingTokens {
|
||||||
|
tokens = append(tokens, int(t))
|
||||||
|
}
|
||||||
|
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
||||||
|
}
|
||||||
21
backend/go/llm/bert/main.go
Normal file
21
backend/go/llm/bert/main.go
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -15,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &VAD{}); err != nil {
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
95
backend/go/llm/rwkv/rwkv.go
Normal file
95
backend/go/llm/rwkv/rwkv.go
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/donomii/go-rwkv.cpp"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
const tokenizerSuffix = ".tokenizer.json"
|
||||||
|
|
||||||
|
type LLM struct {
|
||||||
|
base.SingleThread
|
||||||
|
|
||||||
|
rwkv *rwkv.RwkvState
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
|
tokenizerFile := opts.Tokenizer
|
||||||
|
if tokenizerFile == "" {
|
||||||
|
modelFile := filepath.Base(opts.ModelFile)
|
||||||
|
tokenizerFile = modelFile + tokenizerSuffix
|
||||||
|
}
|
||||||
|
modelPath := filepath.Dir(opts.ModelFile)
|
||||||
|
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
|
||||||
|
|
||||||
|
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
|
||||||
|
|
||||||
|
if model == nil {
|
||||||
|
return fmt.Errorf("rwkv could not load model")
|
||||||
|
}
|
||||||
|
llm.rwkv = model
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
stopWord := "\n"
|
||||||
|
if len(opts.StopPrompts) > 0 {
|
||||||
|
stopWord = opts.StopPrompts[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
|
||||||
|
|
||||||
|
return response, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
|
go func() {
|
||||||
|
|
||||||
|
stopWord := "\n"
|
||||||
|
if len(opts.StopPrompts) > 0 {
|
||||||
|
stopWord = opts.StopPrompts[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||||
|
fmt.Println("Error processing input: ", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
|
||||||
|
results <- s
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
||||||
|
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
|
||||||
|
if err != nil {
|
||||||
|
return pb.TokenizationResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
l := len(tokens)
|
||||||
|
i32Tokens := make([]int32, l)
|
||||||
|
|
||||||
|
for i, t := range tokens {
|
||||||
|
i32Tokens[i] = int32(t.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.TokenizationResponse{
|
||||||
|
Length: int32(l),
|
||||||
|
Tokens: i32Tokens,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -311,16 +311,12 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func isNormalized(k []float32) bool {
|
func isNormalized(k []float32) bool {
|
||||||
var sum float64
|
var sum float32
|
||||||
|
|
||||||
for _, v := range k {
|
for _, v := range k {
|
||||||
v64 := float64(v)
|
sum += v
|
||||||
sum += v64*v64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s := math.Sqrt(sum)
|
return sum == 1.0
|
||||||
|
|
||||||
return s >= 0.99 && s <= 1.01
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This we could replace with handwritten SIMD code
|
// TODO: This we could replace with handwritten SIMD code
|
||||||
@@ -332,7 +328,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
|
|||||||
dot += k1[i] * k2[i]
|
dot += k1[i] * k2[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
|
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
|
||||||
|
|
||||||
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
||||||
return dot
|
return dot
|
||||||
@@ -422,7 +418,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
|
|||||||
|
|
||||||
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
||||||
|
|
||||||
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
|
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
|
||||||
|
|
||||||
return sim
|
return sim
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,54 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/streamer45/silero-vad-go/speech"
|
|
||||||
)
|
|
||||||
|
|
||||||
type VAD struct {
|
|
||||||
base.SingleThread
|
|
||||||
detector *speech.Detector
|
|
||||||
}
|
|
||||||
|
|
||||||
func (vad *VAD) Load(opts *pb.ModelOptions) error {
|
|
||||||
v, err := speech.NewDetector(speech.DetectorConfig{
|
|
||||||
ModelPath: opts.ModelFile,
|
|
||||||
SampleRate: 16000,
|
|
||||||
//WindowSize: 1024,
|
|
||||||
Threshold: 0.5,
|
|
||||||
MinSilenceDurationMs: 0,
|
|
||||||
SpeechPadMs: 0,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("create silero detector: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
vad.detector = v
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
|
|
||||||
audio := req.Audio
|
|
||||||
|
|
||||||
segments, err := vad.detector.Detect(audio)
|
|
||||||
if err != nil {
|
|
||||||
return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
vadSegments := []*pb.VADSegment{}
|
|
||||||
for _, s := range segments {
|
|
||||||
vadSegments = append(vadSegments, &pb.VADSegment{
|
|
||||||
Start: float32(s.SpeechStartAt),
|
|
||||||
End: float32(s.SpeechEndAt),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return pb.VADResponse{
|
|
||||||
Segments: vadSegments,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
torch==2.4.1
|
torch
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.4.1+rocm6.0
|
torch
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.69.0
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1
|
torch
|
||||||
torchaudio==2.4.1
|
torchaudio
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch
|
||||||
torchaudio==2.4.1+cu118
|
torchaudio
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
torch==2.4.1
|
torch
|
||||||
torchaudio==2.4.1
|
torchaudio
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.4.1+rocm6.0
|
torch
|
||||||
torchaudio==2.4.1+rocm6.0
|
torchaudio
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchaudio==2.3.1+cxx11.abi
|
torchaudio
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.69.0
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -17,9 +17,6 @@
|
|||||||
# LIMIT_TARGETS="cublas12"
|
# LIMIT_TARGETS="cublas12"
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
# source $(dirname $0)/../common/libbackend.sh
|
||||||
#
|
#
|
||||||
|
|
||||||
PYTHON_VERSION="3.10"
|
|
||||||
|
|
||||||
function init() {
|
function init() {
|
||||||
# Name of the backend (directory name)
|
# Name of the backend (directory name)
|
||||||
BACKEND_NAME=${PWD##*/}
|
BACKEND_NAME=${PWD##*/}
|
||||||
@@ -91,7 +88,7 @@ function getBuildProfile() {
|
|||||||
# always result in an activated virtual environment
|
# always result in an activated virtual environment
|
||||||
function ensureVenv() {
|
function ensureVenv() {
|
||||||
if [ ! -d "${EDIR}/venv" ]; then
|
if [ ! -d "${EDIR}/venv" ]; then
|
||||||
uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
|
uv venv ${EDIR}/venv
|
||||||
echo "virtualenv created"
|
echo "virtualenv created"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
.DEFAULT_GOAL := install
|
.DEFAULT_GOAL := install
|
||||||
|
|
||||||
.PHONY: install
|
.PHONY: install
|
||||||
install:
|
install: protogen
|
||||||
bash install.sh
|
bash install.sh
|
||||||
$(MAKE) protogen
|
|
||||||
|
|
||||||
.PHONY: protogen
|
.PHONY: protogen
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
@@ -13,7 +12,7 @@ protogen-clean:
|
|||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
bash protogen.sh
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: protogen-clean
|
clean: protogen-clean
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
@@ -1,3 +1,2 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1
|
torch
|
||||||
coqui-tts
|
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch
|
||||||
torchaudio==2.4.1+cu118
|
torchaudio
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
torch==2.4.1
|
torch
|
||||||
torchaudio==2.4.1
|
torchaudio
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.4.1+rocm6.0
|
torch
|
||||||
torchaudio==2.4.1+rocm6.0
|
torchaudio
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
|
||||||
@@ -1,10 +1,8 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchaudio==2.3.1+cxx11.abi
|
torchaudio
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
coqui-tts
|
||||||
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
|
||||||
@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
|
|||||||
This method sets up the gRPC service by starting the server
|
This method sets up the gRPC service by starting the server
|
||||||
"""
|
"""
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||||
time.sleep(30)
|
time.sleep(10)
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import backend_pb2_grpc
|
|||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
@@ -247,16 +247,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
variant=variant)
|
variant=variant)
|
||||||
elif request.PipelineType == "FluxPipeline":
|
elif request.PipelineType == "FluxPipeline":
|
||||||
if fromSingleFile:
|
|
||||||
self.pipe = FluxPipeline.from_single_file(modelFile,
|
|
||||||
torch_dtype=torchType,
|
|
||||||
use_safetensors=True)
|
|
||||||
else:
|
|
||||||
self.pipe = FluxPipeline.from_pretrained(
|
self.pipe = FluxPipeline.from_pretrained(
|
||||||
request.Model,
|
request.Model,
|
||||||
torch_dtype=torch.bfloat16)
|
torch_dtype=torch.bfloat16)
|
||||||
if request.LowVRAM:
|
if request.LowVRAM:
|
||||||
self.pipe.enable_model_cpu_offload()
|
self.pipe.enable_model_cpu_offload()
|
||||||
elif request.PipelineType == "FluxTransformer2DModel":
|
elif request.PipelineType == "FluxTransformer2DModel":
|
||||||
dtype = torch.bfloat16
|
dtype = torch.bfloat16
|
||||||
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
|
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
|
||||||
@@ -275,13 +270,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
if request.LowVRAM:
|
if request.LowVRAM:
|
||||||
self.pipe.enable_model_cpu_offload()
|
self.pipe.enable_model_cpu_offload()
|
||||||
elif request.PipelineType == "SanaPipeline":
|
|
||||||
self.pipe = SanaPipeline.from_pretrained(
|
|
||||||
request.Model,
|
|
||||||
variant="bf16",
|
|
||||||
torch_dtype=torch.bfloat16)
|
|
||||||
self.pipe.vae.to(torch.bfloat16)
|
|
||||||
self.pipe.text_encoder.to(torch.bfloat16)
|
|
||||||
|
|
||||||
if CLIPSKIP and request.CLIPSkip != 0:
|
if CLIPSKIP and request.CLIPSkip != 0:
|
||||||
self.clip_skip = request.CLIPSkip
|
self.clip_skip = request.CLIPSkip
|
||||||
@@ -308,34 +296,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
self.pipe.controlnet = self.controlnet
|
self.pipe.controlnet = self.controlnet
|
||||||
else:
|
else:
|
||||||
self.controlnet = None
|
self.controlnet = None
|
||||||
|
# Assume directory from request.ModelFile.
|
||||||
if request.LoraAdapter and not os.path.isabs(request.LoraAdapter):
|
# Only if request.LoraAdapter it's not an absolute path
|
||||||
|
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
||||||
|
# get base path of modelFile
|
||||||
|
modelFileBase = os.path.dirname(request.ModelFile)
|
||||||
# modify LoraAdapter to be relative to modelFileBase
|
# modify LoraAdapter to be relative to modelFileBase
|
||||||
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
|
request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
|
||||||
|
|
||||||
device = "cpu" if not request.CUDA else "cuda"
|
device = "cpu" if not request.CUDA else "cuda"
|
||||||
self.device = device
|
self.device = device
|
||||||
if request.LoraAdapter:
|
if request.LoraAdapter:
|
||||||
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
||||||
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
||||||
|
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
|
||||||
self.pipe.load_lora_weights(request.LoraAdapter)
|
self.pipe.load_lora_weights(request.LoraAdapter)
|
||||||
else:
|
else:
|
||||||
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
||||||
if len(request.LoraAdapters) > 0:
|
|
||||||
i = 0
|
|
||||||
adapters_name = []
|
|
||||||
adapters_weights = []
|
|
||||||
for adapter in request.LoraAdapters:
|
|
||||||
if not os.path.isabs(adapter):
|
|
||||||
adapter = os.path.join(request.ModelPath, adapter)
|
|
||||||
self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
|
|
||||||
adapters_name.append(f"adapter_{i}")
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
for adapters_weight in request.LoraScales:
|
|
||||||
adapters_weights.append(adapters_weight)
|
|
||||||
|
|
||||||
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
|
|
||||||
|
|
||||||
if request.CUDA:
|
if request.CUDA:
|
||||||
self.pipe.to('cuda')
|
self.pipe.to('cuda')
|
||||||
@@ -416,6 +392,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
# create a dictionary of values for the parameters
|
# create a dictionary of values for the parameters
|
||||||
options = {
|
options = {
|
||||||
"negative_prompt": request.negative_prompt,
|
"negative_prompt": request.negative_prompt,
|
||||||
|
"width": request.width,
|
||||||
|
"height": request.height,
|
||||||
"num_inference_steps": steps,
|
"num_inference_steps": steps,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -433,13 +411,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
keys = options.keys()
|
keys = options.keys()
|
||||||
|
|
||||||
if request.EnableParameters != "":
|
if request.EnableParameters != "":
|
||||||
keys = [key.strip() for key in request.EnableParameters.split(",")]
|
keys = request.EnableParameters.split(",")
|
||||||
|
|
||||||
if request.EnableParameters == "none":
|
if request.EnableParameters == "none":
|
||||||
keys = []
|
keys = []
|
||||||
|
|
||||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
kwargs = {key: options[key] for key in keys}
|
||||||
|
|
||||||
# Set seed
|
# Set seed
|
||||||
if request.seed > 0:
|
if request.seed > 0:
|
||||||
@@ -450,12 +428,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if self.PipelineType == "FluxPipeline":
|
if self.PipelineType == "FluxPipeline":
|
||||||
kwargs["max_sequence_length"] = 256
|
kwargs["max_sequence_length"] = 256
|
||||||
|
|
||||||
if request.width:
|
|
||||||
kwargs["width"] = request.width
|
|
||||||
|
|
||||||
if request.height:
|
|
||||||
kwargs["height"] = request.height
|
|
||||||
|
|
||||||
if self.PipelineType == "FluxTransformer2DModel":
|
if self.PipelineType == "FluxTransformer2DModel":
|
||||||
kwargs["output_type"] = "pil"
|
kwargs["output_type"] = "pil"
|
||||||
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
|
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
|
||||||
@@ -475,7 +447,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
export_to_video(video_frames, request.dst)
|
export_to_video(video_frames, request.dst)
|
||||||
return backend_pb2.Result(message="Media generated successfully", success=True)
|
return backend_pb2.Result(message="Media generated successfully", success=True)
|
||||||
|
|
||||||
print(f"Generating image with {kwargs=}", file=sys.stderr)
|
|
||||||
image = {}
|
image = {}
|
||||||
if COMPEL:
|
if COMPEL:
|
||||||
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
|
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
|
||||||
|
|||||||
@@ -5,5 +5,5 @@ accelerate
|
|||||||
compel
|
compel
|
||||||
peft
|
peft
|
||||||
sentencepiece
|
sentencepiece
|
||||||
torch==2.4.1
|
torch
|
||||||
optimum-quanto
|
optimum-quanto
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch
|
||||||
diffusers
|
diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
torch==2.4.1
|
torch
|
||||||
diffusers
|
diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchvision==0.18.1+cxx11.abi
|
torchvision
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
diffusers
|
diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.69.0
|
grpcio==1.66.2
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1
|
torch
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
torch==2.4.1
|
torch
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
.DEFAULT_GOAL := install
|
|
||||||
|
|
||||||
.PHONY: install
|
|
||||||
install:
|
|
||||||
bash install.sh
|
|
||||||
$(MAKE) protogen
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
bash protogen.sh
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
rm -rf venv __pycache__
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
faster-whisper
|
|
||||||
opencv-python
|
|
||||||
accelerate
|
|
||||||
compel
|
|
||||||
peft
|
|
||||||
sentencepiece
|
|
||||||
torch==2.4.1
|
|
||||||
optimum-quanto
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
faster-whisper
|
|
||||||
opencv-python
|
|
||||||
accelerate
|
|
||||||
compel
|
|
||||||
peft
|
|
||||||
sentencepiece
|
|
||||||
optimum-quanto
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
faster-whisper
|
|
||||||
opencv-python
|
|
||||||
accelerate
|
|
||||||
compel
|
|
||||||
peft
|
|
||||||
sentencepiece
|
|
||||||
optimum-quanto
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
|
||||||
torch==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
|
||||||
faster-whisper
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
grpcio-tools
|
|
||||||
@@ -1,524 +0,0 @@
|
|||||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/istftnet.py
|
|
||||||
# https://github.com/yl4579/StyleTTS2/blob/main/Modules/istftnet.py
|
|
||||||
from scipy.signal import get_window
|
|
||||||
from torch.nn import Conv1d, ConvTranspose1d
|
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm
|
|
||||||
import numpy as np
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
# https://github.com/yl4579/StyleTTS2/blob/main/Modules/utils.py
|
|
||||||
def init_weights(m, mean=0.0, std=0.01):
|
|
||||||
classname = m.__class__.__name__
|
|
||||||
if classname.find("Conv") != -1:
|
|
||||||
m.weight.data.normal_(mean, std)
|
|
||||||
|
|
||||||
def get_padding(kernel_size, dilation=1):
|
|
||||||
return int((kernel_size*dilation - dilation)/2)
|
|
||||||
|
|
||||||
LRELU_SLOPE = 0.1
|
|
||||||
|
|
||||||
class AdaIN1d(nn.Module):
|
|
||||||
def __init__(self, style_dim, num_features):
|
|
||||||
super().__init__()
|
|
||||||
self.norm = nn.InstanceNorm1d(num_features, affine=False)
|
|
||||||
self.fc = nn.Linear(style_dim, num_features*2)
|
|
||||||
|
|
||||||
def forward(self, x, s):
|
|
||||||
h = self.fc(s)
|
|
||||||
h = h.view(h.size(0), h.size(1), 1)
|
|
||||||
gamma, beta = torch.chunk(h, chunks=2, dim=1)
|
|
||||||
return (1 + gamma) * self.norm(x) + beta
|
|
||||||
|
|
||||||
class AdaINResBlock1(torch.nn.Module):
|
|
||||||
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5), style_dim=64):
|
|
||||||
super(AdaINResBlock1, self).__init__()
|
|
||||||
self.convs1 = nn.ModuleList([
|
|
||||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
|
|
||||||
padding=get_padding(kernel_size, dilation[0]))),
|
|
||||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
|
|
||||||
padding=get_padding(kernel_size, dilation[1]))),
|
|
||||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
|
|
||||||
padding=get_padding(kernel_size, dilation[2])))
|
|
||||||
])
|
|
||||||
self.convs1.apply(init_weights)
|
|
||||||
|
|
||||||
self.convs2 = nn.ModuleList([
|
|
||||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
|
||||||
padding=get_padding(kernel_size, 1))),
|
|
||||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
|
||||||
padding=get_padding(kernel_size, 1))),
|
|
||||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
|
||||||
padding=get_padding(kernel_size, 1)))
|
|
||||||
])
|
|
||||||
self.convs2.apply(init_weights)
|
|
||||||
|
|
||||||
self.adain1 = nn.ModuleList([
|
|
||||||
AdaIN1d(style_dim, channels),
|
|
||||||
AdaIN1d(style_dim, channels),
|
|
||||||
AdaIN1d(style_dim, channels),
|
|
||||||
])
|
|
||||||
|
|
||||||
self.adain2 = nn.ModuleList([
|
|
||||||
AdaIN1d(style_dim, channels),
|
|
||||||
AdaIN1d(style_dim, channels),
|
|
||||||
AdaIN1d(style_dim, channels),
|
|
||||||
])
|
|
||||||
|
|
||||||
self.alpha1 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs1))])
|
|
||||||
self.alpha2 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs2))])
|
|
||||||
|
|
||||||
|
|
||||||
def forward(self, x, s):
|
|
||||||
for c1, c2, n1, n2, a1, a2 in zip(self.convs1, self.convs2, self.adain1, self.adain2, self.alpha1, self.alpha2):
|
|
||||||
xt = n1(x, s)
|
|
||||||
xt = xt + (1 / a1) * (torch.sin(a1 * xt) ** 2) # Snake1D
|
|
||||||
xt = c1(xt)
|
|
||||||
xt = n2(xt, s)
|
|
||||||
xt = xt + (1 / a2) * (torch.sin(a2 * xt) ** 2) # Snake1D
|
|
||||||
xt = c2(xt)
|
|
||||||
x = xt + x
|
|
||||||
return x
|
|
||||||
|
|
||||||
def remove_weight_norm(self):
|
|
||||||
for l in self.convs1:
|
|
||||||
remove_weight_norm(l)
|
|
||||||
for l in self.convs2:
|
|
||||||
remove_weight_norm(l)
|
|
||||||
|
|
||||||
class TorchSTFT(torch.nn.Module):
|
|
||||||
def __init__(self, filter_length=800, hop_length=200, win_length=800, window='hann'):
|
|
||||||
super().__init__()
|
|
||||||
self.filter_length = filter_length
|
|
||||||
self.hop_length = hop_length
|
|
||||||
self.win_length = win_length
|
|
||||||
self.window = torch.from_numpy(get_window(window, win_length, fftbins=True).astype(np.float32))
|
|
||||||
|
|
||||||
def transform(self, input_data):
|
|
||||||
forward_transform = torch.stft(
|
|
||||||
input_data,
|
|
||||||
self.filter_length, self.hop_length, self.win_length, window=self.window.to(input_data.device),
|
|
||||||
return_complex=True)
|
|
||||||
|
|
||||||
return torch.abs(forward_transform), torch.angle(forward_transform)
|
|
||||||
|
|
||||||
def inverse(self, magnitude, phase):
|
|
||||||
inverse_transform = torch.istft(
|
|
||||||
magnitude * torch.exp(phase * 1j),
|
|
||||||
self.filter_length, self.hop_length, self.win_length, window=self.window.to(magnitude.device))
|
|
||||||
|
|
||||||
return inverse_transform.unsqueeze(-2) # unsqueeze to stay consistent with conv_transpose1d implementation
|
|
||||||
|
|
||||||
def forward(self, input_data):
|
|
||||||
self.magnitude, self.phase = self.transform(input_data)
|
|
||||||
reconstruction = self.inverse(self.magnitude, self.phase)
|
|
||||||
return reconstruction
|
|
||||||
|
|
||||||
class SineGen(torch.nn.Module):
|
|
||||||
""" Definition of sine generator
|
|
||||||
SineGen(samp_rate, harmonic_num = 0,
|
|
||||||
sine_amp = 0.1, noise_std = 0.003,
|
|
||||||
voiced_threshold = 0,
|
|
||||||
flag_for_pulse=False)
|
|
||||||
samp_rate: sampling rate in Hz
|
|
||||||
harmonic_num: number of harmonic overtones (default 0)
|
|
||||||
sine_amp: amplitude of sine-wavefrom (default 0.1)
|
|
||||||
noise_std: std of Gaussian noise (default 0.003)
|
|
||||||
voiced_thoreshold: F0 threshold for U/V classification (default 0)
|
|
||||||
flag_for_pulse: this SinGen is used inside PulseGen (default False)
|
|
||||||
Note: when flag_for_pulse is True, the first time step of a voiced
|
|
||||||
segment is always sin(np.pi) or cos(0)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, samp_rate, upsample_scale, harmonic_num=0,
|
|
||||||
sine_amp=0.1, noise_std=0.003,
|
|
||||||
voiced_threshold=0,
|
|
||||||
flag_for_pulse=False):
|
|
||||||
super(SineGen, self).__init__()
|
|
||||||
self.sine_amp = sine_amp
|
|
||||||
self.noise_std = noise_std
|
|
||||||
self.harmonic_num = harmonic_num
|
|
||||||
self.dim = self.harmonic_num + 1
|
|
||||||
self.sampling_rate = samp_rate
|
|
||||||
self.voiced_threshold = voiced_threshold
|
|
||||||
self.flag_for_pulse = flag_for_pulse
|
|
||||||
self.upsample_scale = upsample_scale
|
|
||||||
|
|
||||||
def _f02uv(self, f0):
|
|
||||||
# generate uv signal
|
|
||||||
uv = (f0 > self.voiced_threshold).type(torch.float32)
|
|
||||||
return uv
|
|
||||||
|
|
||||||
def _f02sine(self, f0_values):
|
|
||||||
""" f0_values: (batchsize, length, dim)
|
|
||||||
where dim indicates fundamental tone and overtones
|
|
||||||
"""
|
|
||||||
# convert to F0 in rad. The interger part n can be ignored
|
|
||||||
# because 2 * np.pi * n doesn't affect phase
|
|
||||||
rad_values = (f0_values / self.sampling_rate) % 1
|
|
||||||
|
|
||||||
# initial phase noise (no noise for fundamental component)
|
|
||||||
rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \
|
|
||||||
device=f0_values.device)
|
|
||||||
rand_ini[:, 0] = 0
|
|
||||||
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
|
|
||||||
|
|
||||||
# instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad)
|
|
||||||
if not self.flag_for_pulse:
|
|
||||||
# # for normal case
|
|
||||||
|
|
||||||
# # To prevent torch.cumsum numerical overflow,
|
|
||||||
# # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
|
|
||||||
# # Buffer tmp_over_one_idx indicates the time step to add -1.
|
|
||||||
# # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi
|
|
||||||
# tmp_over_one = torch.cumsum(rad_values, 1) % 1
|
|
||||||
# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
|
|
||||||
# cumsum_shift = torch.zeros_like(rad_values)
|
|
||||||
# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
|
|
||||||
|
|
||||||
# phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
|
|
||||||
rad_values = torch.nn.functional.interpolate(rad_values.transpose(1, 2),
|
|
||||||
scale_factor=1/self.upsample_scale,
|
|
||||||
mode="linear").transpose(1, 2)
|
|
||||||
|
|
||||||
# tmp_over_one = torch.cumsum(rad_values, 1) % 1
|
|
||||||
# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
|
|
||||||
# cumsum_shift = torch.zeros_like(rad_values)
|
|
||||||
# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
|
|
||||||
|
|
||||||
phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
|
|
||||||
phase = torch.nn.functional.interpolate(phase.transpose(1, 2) * self.upsample_scale,
|
|
||||||
scale_factor=self.upsample_scale, mode="linear").transpose(1, 2)
|
|
||||||
sines = torch.sin(phase)
|
|
||||||
|
|
||||||
else:
|
|
||||||
# If necessary, make sure that the first time step of every
|
|
||||||
# voiced segments is sin(pi) or cos(0)
|
|
||||||
# This is used for pulse-train generation
|
|
||||||
|
|
||||||
# identify the last time step in unvoiced segments
|
|
||||||
uv = self._f02uv(f0_values)
|
|
||||||
uv_1 = torch.roll(uv, shifts=-1, dims=1)
|
|
||||||
uv_1[:, -1, :] = 1
|
|
||||||
u_loc = (uv < 1) * (uv_1 > 0)
|
|
||||||
|
|
||||||
# get the instantanouse phase
|
|
||||||
tmp_cumsum = torch.cumsum(rad_values, dim=1)
|
|
||||||
# different batch needs to be processed differently
|
|
||||||
for idx in range(f0_values.shape[0]):
|
|
||||||
temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :]
|
|
||||||
temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :]
|
|
||||||
# stores the accumulation of i.phase within
|
|
||||||
# each voiced segments
|
|
||||||
tmp_cumsum[idx, :, :] = 0
|
|
||||||
tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum
|
|
||||||
|
|
||||||
# rad_values - tmp_cumsum: remove the accumulation of i.phase
|
|
||||||
# within the previous voiced segment.
|
|
||||||
i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1)
|
|
||||||
|
|
||||||
# get the sines
|
|
||||||
sines = torch.cos(i_phase * 2 * np.pi)
|
|
||||||
return sines
|
|
||||||
|
|
||||||
def forward(self, f0):
|
|
||||||
""" sine_tensor, uv = forward(f0)
|
|
||||||
input F0: tensor(batchsize=1, length, dim=1)
|
|
||||||
f0 for unvoiced steps should be 0
|
|
||||||
output sine_tensor: tensor(batchsize=1, length, dim)
|
|
||||||
output uv: tensor(batchsize=1, length, 1)
|
|
||||||
"""
|
|
||||||
f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
|
|
||||||
device=f0.device)
|
|
||||||
# fundamental component
|
|
||||||
fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
|
|
||||||
|
|
||||||
# generate sine waveforms
|
|
||||||
sine_waves = self._f02sine(fn) * self.sine_amp
|
|
||||||
|
|
||||||
# generate uv signal
|
|
||||||
# uv = torch.ones(f0.shape)
|
|
||||||
# uv = uv * (f0 > self.voiced_threshold)
|
|
||||||
uv = self._f02uv(f0)
|
|
||||||
|
|
||||||
# noise: for unvoiced should be similar to sine_amp
|
|
||||||
# std = self.sine_amp/3 -> max value ~ self.sine_amp
|
|
||||||
# . for voiced regions is self.noise_std
|
|
||||||
noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
|
|
||||||
noise = noise_amp * torch.randn_like(sine_waves)
|
|
||||||
|
|
||||||
# first: set the unvoiced part to 0 by uv
|
|
||||||
# then: additive noise
|
|
||||||
sine_waves = sine_waves * uv + noise
|
|
||||||
return sine_waves, uv, noise
|
|
||||||
|
|
||||||
|
|
||||||
class SourceModuleHnNSF(torch.nn.Module):
|
|
||||||
""" SourceModule for hn-nsf
|
|
||||||
SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1,
|
|
||||||
add_noise_std=0.003, voiced_threshod=0)
|
|
||||||
sampling_rate: sampling_rate in Hz
|
|
||||||
harmonic_num: number of harmonic above F0 (default: 0)
|
|
||||||
sine_amp: amplitude of sine source signal (default: 0.1)
|
|
||||||
add_noise_std: std of additive Gaussian noise (default: 0.003)
|
|
||||||
note that amplitude of noise in unvoiced is decided
|
|
||||||
by sine_amp
|
|
||||||
voiced_threshold: threhold to set U/V given F0 (default: 0)
|
|
||||||
Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
|
|
||||||
F0_sampled (batchsize, length, 1)
|
|
||||||
Sine_source (batchsize, length, 1)
|
|
||||||
noise_source (batchsize, length 1)
|
|
||||||
uv (batchsize, length, 1)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, sampling_rate, upsample_scale, harmonic_num=0, sine_amp=0.1,
|
|
||||||
add_noise_std=0.003, voiced_threshod=0):
|
|
||||||
super(SourceModuleHnNSF, self).__init__()
|
|
||||||
|
|
||||||
self.sine_amp = sine_amp
|
|
||||||
self.noise_std = add_noise_std
|
|
||||||
|
|
||||||
# to produce sine waveforms
|
|
||||||
self.l_sin_gen = SineGen(sampling_rate, upsample_scale, harmonic_num,
|
|
||||||
sine_amp, add_noise_std, voiced_threshod)
|
|
||||||
|
|
||||||
# to merge source harmonics into a single excitation
|
|
||||||
self.l_linear = torch.nn.Linear(harmonic_num + 1, 1)
|
|
||||||
self.l_tanh = torch.nn.Tanh()
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
"""
|
|
||||||
Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
|
|
||||||
F0_sampled (batchsize, length, 1)
|
|
||||||
Sine_source (batchsize, length, 1)
|
|
||||||
noise_source (batchsize, length 1)
|
|
||||||
"""
|
|
||||||
# source for harmonic branch
|
|
||||||
with torch.no_grad():
|
|
||||||
sine_wavs, uv, _ = self.l_sin_gen(x)
|
|
||||||
sine_merge = self.l_tanh(self.l_linear(sine_wavs))
|
|
||||||
|
|
||||||
# source for noise branch, in the same shape as uv
|
|
||||||
noise = torch.randn_like(uv) * self.sine_amp / 3
|
|
||||||
return sine_merge, noise, uv
|
|
||||||
def padDiff(x):
|
|
||||||
return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0)
|
|
||||||
|
|
||||||
|
|
||||||
class Generator(torch.nn.Module):
|
|
||||||
def __init__(self, style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size):
|
|
||||||
super(Generator, self).__init__()
|
|
||||||
|
|
||||||
self.num_kernels = len(resblock_kernel_sizes)
|
|
||||||
self.num_upsamples = len(upsample_rates)
|
|
||||||
resblock = AdaINResBlock1
|
|
||||||
|
|
||||||
self.m_source = SourceModuleHnNSF(
|
|
||||||
sampling_rate=24000,
|
|
||||||
upsample_scale=np.prod(upsample_rates) * gen_istft_hop_size,
|
|
||||||
harmonic_num=8, voiced_threshod=10)
|
|
||||||
self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates) * gen_istft_hop_size)
|
|
||||||
self.noise_convs = nn.ModuleList()
|
|
||||||
self.noise_res = nn.ModuleList()
|
|
||||||
|
|
||||||
self.ups = nn.ModuleList()
|
|
||||||
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
|
|
||||||
self.ups.append(weight_norm(
|
|
||||||
ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)),
|
|
||||||
k, u, padding=(k-u)//2)))
|
|
||||||
|
|
||||||
self.resblocks = nn.ModuleList()
|
|
||||||
for i in range(len(self.ups)):
|
|
||||||
ch = upsample_initial_channel//(2**(i+1))
|
|
||||||
for j, (k, d) in enumerate(zip(resblock_kernel_sizes,resblock_dilation_sizes)):
|
|
||||||
self.resblocks.append(resblock(ch, k, d, style_dim))
|
|
||||||
|
|
||||||
c_cur = upsample_initial_channel // (2 ** (i + 1))
|
|
||||||
|
|
||||||
if i + 1 < len(upsample_rates): #
|
|
||||||
stride_f0 = np.prod(upsample_rates[i + 1:])
|
|
||||||
self.noise_convs.append(Conv1d(
|
|
||||||
gen_istft_n_fft + 2, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
|
|
||||||
self.noise_res.append(resblock(c_cur, 7, [1,3,5], style_dim))
|
|
||||||
else:
|
|
||||||
self.noise_convs.append(Conv1d(gen_istft_n_fft + 2, c_cur, kernel_size=1))
|
|
||||||
self.noise_res.append(resblock(c_cur, 11, [1,3,5], style_dim))
|
|
||||||
|
|
||||||
|
|
||||||
self.post_n_fft = gen_istft_n_fft
|
|
||||||
self.conv_post = weight_norm(Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3))
|
|
||||||
self.ups.apply(init_weights)
|
|
||||||
self.conv_post.apply(init_weights)
|
|
||||||
self.reflection_pad = torch.nn.ReflectionPad1d((1, 0))
|
|
||||||
self.stft = TorchSTFT(filter_length=gen_istft_n_fft, hop_length=gen_istft_hop_size, win_length=gen_istft_n_fft)
|
|
||||||
|
|
||||||
|
|
||||||
def forward(self, x, s, f0):
|
|
||||||
with torch.no_grad():
|
|
||||||
f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2) # bs,n,t
|
|
||||||
|
|
||||||
har_source, noi_source, uv = self.m_source(f0)
|
|
||||||
har_source = har_source.transpose(1, 2).squeeze(1)
|
|
||||||
har_spec, har_phase = self.stft.transform(har_source)
|
|
||||||
har = torch.cat([har_spec, har_phase], dim=1)
|
|
||||||
|
|
||||||
for i in range(self.num_upsamples):
|
|
||||||
x = F.leaky_relu(x, LRELU_SLOPE)
|
|
||||||
x_source = self.noise_convs[i](har)
|
|
||||||
x_source = self.noise_res[i](x_source, s)
|
|
||||||
|
|
||||||
x = self.ups[i](x)
|
|
||||||
if i == self.num_upsamples - 1:
|
|
||||||
x = self.reflection_pad(x)
|
|
||||||
|
|
||||||
x = x + x_source
|
|
||||||
xs = None
|
|
||||||
for j in range(self.num_kernels):
|
|
||||||
if xs is None:
|
|
||||||
xs = self.resblocks[i*self.num_kernels+j](x, s)
|
|
||||||
else:
|
|
||||||
xs += self.resblocks[i*self.num_kernels+j](x, s)
|
|
||||||
x = xs / self.num_kernels
|
|
||||||
x = F.leaky_relu(x)
|
|
||||||
x = self.conv_post(x)
|
|
||||||
spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
|
|
||||||
phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
|
|
||||||
return self.stft.inverse(spec, phase)
|
|
||||||
|
|
||||||
def fw_phase(self, x, s):
|
|
||||||
for i in range(self.num_upsamples):
|
|
||||||
x = F.leaky_relu(x, LRELU_SLOPE)
|
|
||||||
x = self.ups[i](x)
|
|
||||||
xs = None
|
|
||||||
for j in range(self.num_kernels):
|
|
||||||
if xs is None:
|
|
||||||
xs = self.resblocks[i*self.num_kernels+j](x, s)
|
|
||||||
else:
|
|
||||||
xs += self.resblocks[i*self.num_kernels+j](x, s)
|
|
||||||
x = xs / self.num_kernels
|
|
||||||
x = F.leaky_relu(x)
|
|
||||||
x = self.reflection_pad(x)
|
|
||||||
x = self.conv_post(x)
|
|
||||||
spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
|
|
||||||
phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
|
|
||||||
return spec, phase
|
|
||||||
|
|
||||||
def remove_weight_norm(self):
|
|
||||||
print('Removing weight norm...')
|
|
||||||
for l in self.ups:
|
|
||||||
remove_weight_norm(l)
|
|
||||||
for l in self.resblocks:
|
|
||||||
l.remove_weight_norm()
|
|
||||||
remove_weight_norm(self.conv_pre)
|
|
||||||
remove_weight_norm(self.conv_post)
|
|
||||||
|
|
||||||
|
|
||||||
class AdainResBlk1d(nn.Module):
|
|
||||||
def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
|
|
||||||
upsample='none', dropout_p=0.0):
|
|
||||||
super().__init__()
|
|
||||||
self.actv = actv
|
|
||||||
self.upsample_type = upsample
|
|
||||||
self.upsample = UpSample1d(upsample)
|
|
||||||
self.learned_sc = dim_in != dim_out
|
|
||||||
self._build_weights(dim_in, dim_out, style_dim)
|
|
||||||
self.dropout = nn.Dropout(dropout_p)
|
|
||||||
|
|
||||||
if upsample == 'none':
|
|
||||||
self.pool = nn.Identity()
|
|
||||||
else:
|
|
||||||
self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
|
|
||||||
|
|
||||||
|
|
||||||
def _build_weights(self, dim_in, dim_out, style_dim):
|
|
||||||
self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
|
|
||||||
self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
|
|
||||||
self.norm1 = AdaIN1d(style_dim, dim_in)
|
|
||||||
self.norm2 = AdaIN1d(style_dim, dim_out)
|
|
||||||
if self.learned_sc:
|
|
||||||
self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
|
|
||||||
|
|
||||||
def _shortcut(self, x):
|
|
||||||
x = self.upsample(x)
|
|
||||||
if self.learned_sc:
|
|
||||||
x = self.conv1x1(x)
|
|
||||||
return x
|
|
||||||
|
|
||||||
def _residual(self, x, s):
|
|
||||||
x = self.norm1(x, s)
|
|
||||||
x = self.actv(x)
|
|
||||||
x = self.pool(x)
|
|
||||||
x = self.conv1(self.dropout(x))
|
|
||||||
x = self.norm2(x, s)
|
|
||||||
x = self.actv(x)
|
|
||||||
x = self.conv2(self.dropout(x))
|
|
||||||
return x
|
|
||||||
|
|
||||||
def forward(self, x, s):
|
|
||||||
out = self._residual(x, s)
|
|
||||||
out = (out + self._shortcut(x)) / np.sqrt(2)
|
|
||||||
return out
|
|
||||||
|
|
||||||
class UpSample1d(nn.Module):
|
|
||||||
def __init__(self, layer_type):
|
|
||||||
super().__init__()
|
|
||||||
self.layer_type = layer_type
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
if self.layer_type == 'none':
|
|
||||||
return x
|
|
||||||
else:
|
|
||||||
return F.interpolate(x, scale_factor=2, mode='nearest')
|
|
||||||
|
|
||||||
class Decoder(nn.Module):
|
|
||||||
def __init__(self, dim_in=512, F0_channel=512, style_dim=64, dim_out=80,
|
|
||||||
resblock_kernel_sizes = [3,7,11],
|
|
||||||
upsample_rates = [10, 6],
|
|
||||||
upsample_initial_channel=512,
|
|
||||||
resblock_dilation_sizes=[[1,3,5], [1,3,5], [1,3,5]],
|
|
||||||
upsample_kernel_sizes=[20, 12],
|
|
||||||
gen_istft_n_fft=20, gen_istft_hop_size=5):
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.decode = nn.ModuleList()
|
|
||||||
|
|
||||||
self.encode = AdainResBlk1d(dim_in + 2, 1024, style_dim)
|
|
||||||
|
|
||||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
|
||||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
|
||||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
|
||||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 512, style_dim, upsample=True))
|
|
||||||
|
|
||||||
self.F0_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
|
|
||||||
|
|
||||||
self.N_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
|
|
||||||
|
|
||||||
self.asr_res = nn.Sequential(
|
|
||||||
weight_norm(nn.Conv1d(512, 64, kernel_size=1)),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
self.generator = Generator(style_dim, resblock_kernel_sizes, upsample_rates,
|
|
||||||
upsample_initial_channel, resblock_dilation_sizes,
|
|
||||||
upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size)
|
|
||||||
|
|
||||||
def forward(self, asr, F0_curve, N, s):
|
|
||||||
F0 = self.F0_conv(F0_curve.unsqueeze(1))
|
|
||||||
N = self.N_conv(N.unsqueeze(1))
|
|
||||||
|
|
||||||
x = torch.cat([asr, F0, N], axis=1)
|
|
||||||
x = self.encode(x, s)
|
|
||||||
|
|
||||||
asr_res = self.asr_res(asr)
|
|
||||||
|
|
||||||
res = True
|
|
||||||
for block in self.decode:
|
|
||||||
if res:
|
|
||||||
x = torch.cat([x, asr_res, F0, N], axis=1)
|
|
||||||
x = block(x, s)
|
|
||||||
if block.upsample_type != "none":
|
|
||||||
res = False
|
|
||||||
|
|
||||||
x = self.generator(x, s, F0_curve)
|
|
||||||
return x
|
|
||||||
@@ -1,166 +0,0 @@
|
|||||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/kokoro.py
|
|
||||||
import phonemizer
|
|
||||||
import re
|
|
||||||
import torch
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
def split_num(num):
|
|
||||||
num = num.group()
|
|
||||||
if '.' in num:
|
|
||||||
return num
|
|
||||||
elif ':' in num:
|
|
||||||
h, m = [int(n) for n in num.split(':')]
|
|
||||||
if m == 0:
|
|
||||||
return f"{h} o'clock"
|
|
||||||
elif m < 10:
|
|
||||||
return f'{h} oh {m}'
|
|
||||||
return f'{h} {m}'
|
|
||||||
year = int(num[:4])
|
|
||||||
if year < 1100 or year % 1000 < 10:
|
|
||||||
return num
|
|
||||||
left, right = num[:2], int(num[2:4])
|
|
||||||
s = 's' if num.endswith('s') else ''
|
|
||||||
if 100 <= year % 1000 <= 999:
|
|
||||||
if right == 0:
|
|
||||||
return f'{left} hundred{s}'
|
|
||||||
elif right < 10:
|
|
||||||
return f'{left} oh {right}{s}'
|
|
||||||
return f'{left} {right}{s}'
|
|
||||||
|
|
||||||
def flip_money(m):
|
|
||||||
m = m.group()
|
|
||||||
bill = 'dollar' if m[0] == '$' else 'pound'
|
|
||||||
if m[-1].isalpha():
|
|
||||||
return f'{m[1:]} {bill}s'
|
|
||||||
elif '.' not in m:
|
|
||||||
s = '' if m[1:] == '1' else 's'
|
|
||||||
return f'{m[1:]} {bill}{s}'
|
|
||||||
b, c = m[1:].split('.')
|
|
||||||
s = '' if b == '1' else 's'
|
|
||||||
c = int(c.ljust(2, '0'))
|
|
||||||
coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence')
|
|
||||||
return f'{b} {bill}{s} and {c} {coins}'
|
|
||||||
|
|
||||||
def point_num(num):
|
|
||||||
a, b = num.group().split('.')
|
|
||||||
return ' point '.join([a, ' '.join(b)])
|
|
||||||
|
|
||||||
def normalize_text(text):
|
|
||||||
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
|
|
||||||
text = text.replace('«', chr(8220)).replace('»', chr(8221))
|
|
||||||
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
|
|
||||||
text = text.replace('(', '«').replace(')', '»')
|
|
||||||
for a, b in zip('、。!,:;?', ',.!,:;?'):
|
|
||||||
text = text.replace(a, b+' ')
|
|
||||||
text = re.sub(r'[^\S \n]', ' ', text)
|
|
||||||
text = re.sub(r' +', ' ', text)
|
|
||||||
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
|
||||||
text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
|
|
||||||
text = re.sub(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister', text)
|
|
||||||
text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
|
|
||||||
text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
|
|
||||||
text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
|
|
||||||
text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text)
|
|
||||||
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
|
||||||
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
|
||||||
text = re.sub(r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
|
||||||
text = re.sub(r'\d*\.\d+', point_num, text)
|
|
||||||
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text)
|
|
||||||
text = re.sub(r'(?<=\d)S', ' S', text)
|
|
||||||
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
|
|
||||||
text = re.sub(r"(?<=X')S\b", 's', text)
|
|
||||||
text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
|
|
||||||
text = re.sub(r'(?i)(?<=[A-Z])\.(?=[A-Z])', '-', text)
|
|
||||||
return text.strip()
|
|
||||||
|
|
||||||
def get_vocab():
|
|
||||||
_pad = "$"
|
|
||||||
_punctuation = ';:,.!?¡¿—…"«»“” '
|
|
||||||
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
|
||||||
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
|
|
||||||
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
|
|
||||||
dicts = {}
|
|
||||||
for i in range(len((symbols))):
|
|
||||||
dicts[symbols[i]] = i
|
|
||||||
return dicts
|
|
||||||
|
|
||||||
VOCAB = get_vocab()
|
|
||||||
def tokenize(ps):
|
|
||||||
return [i for i in map(VOCAB.get, ps) if i is not None]
|
|
||||||
|
|
||||||
phonemizers = dict(
|
|
||||||
a=phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True),
|
|
||||||
b=phonemizer.backend.EspeakBackend(language='en-gb', preserve_punctuation=True, with_stress=True),
|
|
||||||
)
|
|
||||||
def phonemize(text, lang, norm=True):
|
|
||||||
if norm:
|
|
||||||
text = normalize_text(text)
|
|
||||||
ps = phonemizers[lang].phonemize([text])
|
|
||||||
ps = ps[0] if ps else ''
|
|
||||||
# https://en.wiktionary.org/wiki/kokoro#English
|
|
||||||
ps = ps.replace('kəkˈoːɹoʊ', 'kˈoʊkəɹoʊ').replace('kəkˈɔːɹəʊ', 'kˈəʊkəɹəʊ')
|
|
||||||
ps = ps.replace('ʲ', 'j').replace('r', 'ɹ').replace('x', 'k').replace('ɬ', 'l')
|
|
||||||
ps = re.sub(r'(?<=[a-zɹː])(?=hˈʌndɹɪd)', ' ', ps)
|
|
||||||
ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»“” ]|$)', 'z', ps)
|
|
||||||
if lang == 'a':
|
|
||||||
ps = re.sub(r'(?<=nˈaɪn)ti(?!ː)', 'di', ps)
|
|
||||||
ps = ''.join(filter(lambda p: p in VOCAB, ps))
|
|
||||||
return ps.strip()
|
|
||||||
|
|
||||||
def length_to_mask(lengths):
|
|
||||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
|
||||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
|
||||||
return mask
|
|
||||||
|
|
||||||
@torch.no_grad()
|
|
||||||
def forward(model, tokens, ref_s, speed):
|
|
||||||
device = ref_s.device
|
|
||||||
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
|
||||||
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
|
||||||
text_mask = length_to_mask(input_lengths).to(device)
|
|
||||||
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
|
|
||||||
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
|
|
||||||
s = ref_s[:, 128:]
|
|
||||||
d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
|
|
||||||
x, _ = model.predictor.lstm(d)
|
|
||||||
duration = model.predictor.duration_proj(x)
|
|
||||||
duration = torch.sigmoid(duration).sum(axis=-1) / speed
|
|
||||||
pred_dur = torch.round(duration).clamp(min=1).long()
|
|
||||||
pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
|
|
||||||
c_frame = 0
|
|
||||||
for i in range(pred_aln_trg.size(0)):
|
|
||||||
pred_aln_trg[i, c_frame:c_frame + pred_dur[0,i].item()] = 1
|
|
||||||
c_frame += pred_dur[0,i].item()
|
|
||||||
en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
|
|
||||||
F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
|
|
||||||
t_en = model.text_encoder(tokens, input_lengths, text_mask)
|
|
||||||
asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
|
|
||||||
return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
|
||||||
|
|
||||||
def generate(model, text, voicepack, lang='a', speed=1, ps=None):
|
|
||||||
ps = ps or phonemize(text, lang)
|
|
||||||
tokens = tokenize(ps)
|
|
||||||
if not tokens:
|
|
||||||
return None
|
|
||||||
elif len(tokens) > 510:
|
|
||||||
tokens = tokens[:510]
|
|
||||||
print('Truncated to 510 tokens')
|
|
||||||
ref_s = voicepack[len(tokens)]
|
|
||||||
out = forward(model, tokens, ref_s, speed)
|
|
||||||
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
|
||||||
return out, ps
|
|
||||||
|
|
||||||
def generate_full(model, text, voicepack, lang='a', speed=1, ps=None):
|
|
||||||
ps = ps or phonemize(text, lang)
|
|
||||||
tokens = tokenize(ps)
|
|
||||||
if not tokens:
|
|
||||||
return None
|
|
||||||
outs = []
|
|
||||||
loop_count = len(tokens)//510 + (1 if len(tokens) % 510 != 0 else 0)
|
|
||||||
for i in range(loop_count):
|
|
||||||
ref_s = voicepack[len(tokens[i*510:(i+1)*510])]
|
|
||||||
out = forward(model, tokens[i*510:(i+1)*510], ref_s, speed)
|
|
||||||
outs.append(out)
|
|
||||||
outs = np.concatenate(outs)
|
|
||||||
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
|
||||||
return outs, ps
|
|
||||||
@@ -1,373 +0,0 @@
|
|||||||
# https://github.com/yl4579/StyleTTS2/blob/main/models.py
|
|
||||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/models.py
|
|
||||||
from istftnet import AdaIN1d, Decoder
|
|
||||||
from munch import Munch
|
|
||||||
from pathlib import Path
|
|
||||||
from plbert import load_plbert
|
|
||||||
from torch.nn.utils import weight_norm, spectral_norm
|
|
||||||
import json
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
import os.path as osp
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
class LinearNorm(torch.nn.Module):
|
|
||||||
def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
|
|
||||||
super(LinearNorm, self).__init__()
|
|
||||||
self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
|
|
||||||
|
|
||||||
torch.nn.init.xavier_uniform_(
|
|
||||||
self.linear_layer.weight,
|
|
||||||
gain=torch.nn.init.calculate_gain(w_init_gain))
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
return self.linear_layer(x)
|
|
||||||
|
|
||||||
class LayerNorm(nn.Module):
|
|
||||||
def __init__(self, channels, eps=1e-5):
|
|
||||||
super().__init__()
|
|
||||||
self.channels = channels
|
|
||||||
self.eps = eps
|
|
||||||
|
|
||||||
self.gamma = nn.Parameter(torch.ones(channels))
|
|
||||||
self.beta = nn.Parameter(torch.zeros(channels))
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
x = x.transpose(1, -1)
|
|
||||||
x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
|
|
||||||
return x.transpose(1, -1)
|
|
||||||
|
|
||||||
class TextEncoder(nn.Module):
|
|
||||||
def __init__(self, channels, kernel_size, depth, n_symbols, actv=nn.LeakyReLU(0.2)):
|
|
||||||
super().__init__()
|
|
||||||
self.embedding = nn.Embedding(n_symbols, channels)
|
|
||||||
|
|
||||||
padding = (kernel_size - 1) // 2
|
|
||||||
self.cnn = nn.ModuleList()
|
|
||||||
for _ in range(depth):
|
|
||||||
self.cnn.append(nn.Sequential(
|
|
||||||
weight_norm(nn.Conv1d(channels, channels, kernel_size=kernel_size, padding=padding)),
|
|
||||||
LayerNorm(channels),
|
|
||||||
actv,
|
|
||||||
nn.Dropout(0.2),
|
|
||||||
))
|
|
||||||
# self.cnn = nn.Sequential(*self.cnn)
|
|
||||||
|
|
||||||
self.lstm = nn.LSTM(channels, channels//2, 1, batch_first=True, bidirectional=True)
|
|
||||||
|
|
||||||
def forward(self, x, input_lengths, m):
|
|
||||||
x = self.embedding(x) # [B, T, emb]
|
|
||||||
x = x.transpose(1, 2) # [B, emb, T]
|
|
||||||
m = m.to(input_lengths.device).unsqueeze(1)
|
|
||||||
x.masked_fill_(m, 0.0)
|
|
||||||
|
|
||||||
for c in self.cnn:
|
|
||||||
x = c(x)
|
|
||||||
x.masked_fill_(m, 0.0)
|
|
||||||
|
|
||||||
x = x.transpose(1, 2) # [B, T, chn]
|
|
||||||
|
|
||||||
input_lengths = input_lengths.cpu().numpy()
|
|
||||||
x = nn.utils.rnn.pack_padded_sequence(
|
|
||||||
x, input_lengths, batch_first=True, enforce_sorted=False)
|
|
||||||
|
|
||||||
self.lstm.flatten_parameters()
|
|
||||||
x, _ = self.lstm(x)
|
|
||||||
x, _ = nn.utils.rnn.pad_packed_sequence(
|
|
||||||
x, batch_first=True)
|
|
||||||
|
|
||||||
x = x.transpose(-1, -2)
|
|
||||||
x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
|
|
||||||
|
|
||||||
x_pad[:, :, :x.shape[-1]] = x
|
|
||||||
x = x_pad.to(x.device)
|
|
||||||
|
|
||||||
x.masked_fill_(m, 0.0)
|
|
||||||
|
|
||||||
return x
|
|
||||||
|
|
||||||
def inference(self, x):
|
|
||||||
x = self.embedding(x)
|
|
||||||
x = x.transpose(1, 2)
|
|
||||||
x = self.cnn(x)
|
|
||||||
x = x.transpose(1, 2)
|
|
||||||
self.lstm.flatten_parameters()
|
|
||||||
x, _ = self.lstm(x)
|
|
||||||
return x
|
|
||||||
|
|
||||||
def length_to_mask(self, lengths):
|
|
||||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
|
||||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
|
||||||
return mask
|
|
||||||
|
|
||||||
|
|
||||||
class UpSample1d(nn.Module):
|
|
||||||
def __init__(self, layer_type):
|
|
||||||
super().__init__()
|
|
||||||
self.layer_type = layer_type
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
if self.layer_type == 'none':
|
|
||||||
return x
|
|
||||||
else:
|
|
||||||
return F.interpolate(x, scale_factor=2, mode='nearest')
|
|
||||||
|
|
||||||
class AdainResBlk1d(nn.Module):
|
|
||||||
def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
|
|
||||||
upsample='none', dropout_p=0.0):
|
|
||||||
super().__init__()
|
|
||||||
self.actv = actv
|
|
||||||
self.upsample_type = upsample
|
|
||||||
self.upsample = UpSample1d(upsample)
|
|
||||||
self.learned_sc = dim_in != dim_out
|
|
||||||
self._build_weights(dim_in, dim_out, style_dim)
|
|
||||||
self.dropout = nn.Dropout(dropout_p)
|
|
||||||
|
|
||||||
if upsample == 'none':
|
|
||||||
self.pool = nn.Identity()
|
|
||||||
else:
|
|
||||||
self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
|
|
||||||
|
|
||||||
|
|
||||||
def _build_weights(self, dim_in, dim_out, style_dim):
|
|
||||||
self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
|
|
||||||
self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
|
|
||||||
self.norm1 = AdaIN1d(style_dim, dim_in)
|
|
||||||
self.norm2 = AdaIN1d(style_dim, dim_out)
|
|
||||||
if self.learned_sc:
|
|
||||||
self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
|
|
||||||
|
|
||||||
def _shortcut(self, x):
|
|
||||||
x = self.upsample(x)
|
|
||||||
if self.learned_sc:
|
|
||||||
x = self.conv1x1(x)
|
|
||||||
return x
|
|
||||||
|
|
||||||
def _residual(self, x, s):
|
|
||||||
x = self.norm1(x, s)
|
|
||||||
x = self.actv(x)
|
|
||||||
x = self.pool(x)
|
|
||||||
x = self.conv1(self.dropout(x))
|
|
||||||
x = self.norm2(x, s)
|
|
||||||
x = self.actv(x)
|
|
||||||
x = self.conv2(self.dropout(x))
|
|
||||||
return x
|
|
||||||
|
|
||||||
def forward(self, x, s):
|
|
||||||
out = self._residual(x, s)
|
|
||||||
out = (out + self._shortcut(x)) / np.sqrt(2)
|
|
||||||
return out
|
|
||||||
|
|
||||||
class AdaLayerNorm(nn.Module):
|
|
||||||
def __init__(self, style_dim, channels, eps=1e-5):
|
|
||||||
super().__init__()
|
|
||||||
self.channels = channels
|
|
||||||
self.eps = eps
|
|
||||||
|
|
||||||
self.fc = nn.Linear(style_dim, channels*2)
|
|
||||||
|
|
||||||
def forward(self, x, s):
|
|
||||||
x = x.transpose(-1, -2)
|
|
||||||
x = x.transpose(1, -1)
|
|
||||||
|
|
||||||
h = self.fc(s)
|
|
||||||
h = h.view(h.size(0), h.size(1), 1)
|
|
||||||
gamma, beta = torch.chunk(h, chunks=2, dim=1)
|
|
||||||
gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1)
|
|
||||||
|
|
||||||
|
|
||||||
x = F.layer_norm(x, (self.channels,), eps=self.eps)
|
|
||||||
x = (1 + gamma) * x + beta
|
|
||||||
return x.transpose(1, -1).transpose(-1, -2)
|
|
||||||
|
|
||||||
class ProsodyPredictor(nn.Module):
|
|
||||||
|
|
||||||
def __init__(self, style_dim, d_hid, nlayers, max_dur=50, dropout=0.1):
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.text_encoder = DurationEncoder(sty_dim=style_dim,
|
|
||||||
d_model=d_hid,
|
|
||||||
nlayers=nlayers,
|
|
||||||
dropout=dropout)
|
|
||||||
|
|
||||||
self.lstm = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
|
|
||||||
self.duration_proj = LinearNorm(d_hid, max_dur)
|
|
||||||
|
|
||||||
self.shared = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
|
|
||||||
self.F0 = nn.ModuleList()
|
|
||||||
self.F0.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
|
|
||||||
self.F0.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
|
|
||||||
self.F0.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
|
|
||||||
|
|
||||||
self.N = nn.ModuleList()
|
|
||||||
self.N.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
|
|
||||||
self.N.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
|
|
||||||
self.N.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
|
|
||||||
|
|
||||||
self.F0_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
|
|
||||||
self.N_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
|
|
||||||
|
|
||||||
|
|
||||||
def forward(self, texts, style, text_lengths, alignment, m):
|
|
||||||
d = self.text_encoder(texts, style, text_lengths, m)
|
|
||||||
|
|
||||||
batch_size = d.shape[0]
|
|
||||||
text_size = d.shape[1]
|
|
||||||
|
|
||||||
# predict duration
|
|
||||||
input_lengths = text_lengths.cpu().numpy()
|
|
||||||
x = nn.utils.rnn.pack_padded_sequence(
|
|
||||||
d, input_lengths, batch_first=True, enforce_sorted=False)
|
|
||||||
|
|
||||||
m = m.to(text_lengths.device).unsqueeze(1)
|
|
||||||
|
|
||||||
self.lstm.flatten_parameters()
|
|
||||||
x, _ = self.lstm(x)
|
|
||||||
x, _ = nn.utils.rnn.pad_packed_sequence(
|
|
||||||
x, batch_first=True)
|
|
||||||
|
|
||||||
x_pad = torch.zeros([x.shape[0], m.shape[-1], x.shape[-1]])
|
|
||||||
|
|
||||||
x_pad[:, :x.shape[1], :] = x
|
|
||||||
x = x_pad.to(x.device)
|
|
||||||
|
|
||||||
duration = self.duration_proj(nn.functional.dropout(x, 0.5, training=self.training))
|
|
||||||
|
|
||||||
en = (d.transpose(-1, -2) @ alignment)
|
|
||||||
|
|
||||||
return duration.squeeze(-1), en
|
|
||||||
|
|
||||||
def F0Ntrain(self, x, s):
|
|
||||||
x, _ = self.shared(x.transpose(-1, -2))
|
|
||||||
|
|
||||||
F0 = x.transpose(-1, -2)
|
|
||||||
for block in self.F0:
|
|
||||||
F0 = block(F0, s)
|
|
||||||
F0 = self.F0_proj(F0)
|
|
||||||
|
|
||||||
N = x.transpose(-1, -2)
|
|
||||||
for block in self.N:
|
|
||||||
N = block(N, s)
|
|
||||||
N = self.N_proj(N)
|
|
||||||
|
|
||||||
return F0.squeeze(1), N.squeeze(1)
|
|
||||||
|
|
||||||
def length_to_mask(self, lengths):
|
|
||||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
|
||||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
|
||||||
return mask
|
|
||||||
|
|
||||||
class DurationEncoder(nn.Module):
|
|
||||||
|
|
||||||
def __init__(self, sty_dim, d_model, nlayers, dropout=0.1):
|
|
||||||
super().__init__()
|
|
||||||
self.lstms = nn.ModuleList()
|
|
||||||
for _ in range(nlayers):
|
|
||||||
self.lstms.append(nn.LSTM(d_model + sty_dim,
|
|
||||||
d_model // 2,
|
|
||||||
num_layers=1,
|
|
||||||
batch_first=True,
|
|
||||||
bidirectional=True,
|
|
||||||
dropout=dropout))
|
|
||||||
self.lstms.append(AdaLayerNorm(sty_dim, d_model))
|
|
||||||
|
|
||||||
|
|
||||||
self.dropout = dropout
|
|
||||||
self.d_model = d_model
|
|
||||||
self.sty_dim = sty_dim
|
|
||||||
|
|
||||||
def forward(self, x, style, text_lengths, m):
|
|
||||||
masks = m.to(text_lengths.device)
|
|
||||||
|
|
||||||
x = x.permute(2, 0, 1)
|
|
||||||
s = style.expand(x.shape[0], x.shape[1], -1)
|
|
||||||
x = torch.cat([x, s], axis=-1)
|
|
||||||
x.masked_fill_(masks.unsqueeze(-1).transpose(0, 1), 0.0)
|
|
||||||
|
|
||||||
x = x.transpose(0, 1)
|
|
||||||
input_lengths = text_lengths.cpu().numpy()
|
|
||||||
x = x.transpose(-1, -2)
|
|
||||||
|
|
||||||
for block in self.lstms:
|
|
||||||
if isinstance(block, AdaLayerNorm):
|
|
||||||
x = block(x.transpose(-1, -2), style).transpose(-1, -2)
|
|
||||||
x = torch.cat([x, s.permute(1, -1, 0)], axis=1)
|
|
||||||
x.masked_fill_(masks.unsqueeze(-1).transpose(-1, -2), 0.0)
|
|
||||||
else:
|
|
||||||
x = x.transpose(-1, -2)
|
|
||||||
x = nn.utils.rnn.pack_padded_sequence(
|
|
||||||
x, input_lengths, batch_first=True, enforce_sorted=False)
|
|
||||||
block.flatten_parameters()
|
|
||||||
x, _ = block(x)
|
|
||||||
x, _ = nn.utils.rnn.pad_packed_sequence(
|
|
||||||
x, batch_first=True)
|
|
||||||
x = F.dropout(x, p=self.dropout, training=self.training)
|
|
||||||
x = x.transpose(-1, -2)
|
|
||||||
|
|
||||||
x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
|
|
||||||
|
|
||||||
x_pad[:, :, :x.shape[-1]] = x
|
|
||||||
x = x_pad.to(x.device)
|
|
||||||
|
|
||||||
return x.transpose(-1, -2)
|
|
||||||
|
|
||||||
def inference(self, x, style):
|
|
||||||
x = self.embedding(x.transpose(-1, -2)) * np.sqrt(self.d_model)
|
|
||||||
style = style.expand(x.shape[0], x.shape[1], -1)
|
|
||||||
x = torch.cat([x, style], axis=-1)
|
|
||||||
src = self.pos_encoder(x)
|
|
||||||
output = self.transformer_encoder(src).transpose(0, 1)
|
|
||||||
return output
|
|
||||||
|
|
||||||
def length_to_mask(self, lengths):
|
|
||||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
|
||||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
|
||||||
return mask
|
|
||||||
|
|
||||||
# https://github.com/yl4579/StyleTTS2/blob/main/utils.py
|
|
||||||
def recursive_munch(d):
|
|
||||||
if isinstance(d, dict):
|
|
||||||
return Munch((k, recursive_munch(v)) for k, v in d.items())
|
|
||||||
elif isinstance(d, list):
|
|
||||||
return [recursive_munch(v) for v in d]
|
|
||||||
else:
|
|
||||||
return d
|
|
||||||
|
|
||||||
def build_model(path, device):
|
|
||||||
config = Path(__file__).parent / 'config.json'
|
|
||||||
assert config.exists(), f'Config path incorrect: config.json not found at {config}'
|
|
||||||
with open(config, 'r') as r:
|
|
||||||
args = recursive_munch(json.load(r))
|
|
||||||
assert args.decoder.type == 'istftnet', f'Unknown decoder type: {args.decoder.type}'
|
|
||||||
decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels,
|
|
||||||
resblock_kernel_sizes = args.decoder.resblock_kernel_sizes,
|
|
||||||
upsample_rates = args.decoder.upsample_rates,
|
|
||||||
upsample_initial_channel=args.decoder.upsample_initial_channel,
|
|
||||||
resblock_dilation_sizes=args.decoder.resblock_dilation_sizes,
|
|
||||||
upsample_kernel_sizes=args.decoder.upsample_kernel_sizes,
|
|
||||||
gen_istft_n_fft=args.decoder.gen_istft_n_fft, gen_istft_hop_size=args.decoder.gen_istft_hop_size)
|
|
||||||
text_encoder = TextEncoder(channels=args.hidden_dim, kernel_size=5, depth=args.n_layer, n_symbols=args.n_token)
|
|
||||||
predictor = ProsodyPredictor(style_dim=args.style_dim, d_hid=args.hidden_dim, nlayers=args.n_layer, max_dur=args.max_dur, dropout=args.dropout)
|
|
||||||
bert = load_plbert()
|
|
||||||
bert_encoder = nn.Linear(bert.config.hidden_size, args.hidden_dim)
|
|
||||||
for parent in [bert, bert_encoder, predictor, decoder, text_encoder]:
|
|
||||||
for child in parent.children():
|
|
||||||
if isinstance(child, nn.RNNBase):
|
|
||||||
child.flatten_parameters()
|
|
||||||
model = Munch(
|
|
||||||
bert=bert.to(device).eval(),
|
|
||||||
bert_encoder=bert_encoder.to(device).eval(),
|
|
||||||
predictor=predictor.to(device).eval(),
|
|
||||||
decoder=decoder.to(device).eval(),
|
|
||||||
text_encoder=text_encoder.to(device).eval(),
|
|
||||||
)
|
|
||||||
for key, state_dict in torch.load(path, map_location='cpu', weights_only=True)['net'].items():
|
|
||||||
assert key in model, key
|
|
||||||
try:
|
|
||||||
model[key].load_state_dict(state_dict)
|
|
||||||
except:
|
|
||||||
state_dict = {k[7:]: v for k, v in state_dict.items()}
|
|
||||||
model[key].load_state_dict(state_dict, strict=False)
|
|
||||||
return model
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/plbert.py
|
|
||||||
# https://github.com/yl4579/StyleTTS2/blob/main/Utils/PLBERT/util.py
|
|
||||||
from transformers import AlbertConfig, AlbertModel
|
|
||||||
|
|
||||||
class CustomAlbert(AlbertModel):
|
|
||||||
def forward(self, *args, **kwargs):
|
|
||||||
# Call the original forward method
|
|
||||||
outputs = super().forward(*args, **kwargs)
|
|
||||||
# Only return the last_hidden_state
|
|
||||||
return outputs.last_hidden_state
|
|
||||||
|
|
||||||
def load_plbert():
|
|
||||||
plbert_config = {'vocab_size': 178, 'hidden_size': 768, 'num_attention_heads': 12, 'intermediate_size': 2048, 'max_position_embeddings': 512, 'num_hidden_layers': 12, 'dropout': 0.1}
|
|
||||||
albert_base_configuration = AlbertConfig(**plbert_config)
|
|
||||||
bert = CustomAlbert(albert_base_configuration)
|
|
||||||
return bert
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
transformers
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
transformers
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
|
||||||
torch==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
transformers
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
phonemizer
|
|
||||||
scipy
|
|
||||||
munch
|
|
||||||
setuptools
|
|
||||||
soundfile
|
|
||||||
29
backend/python/mamba/Makefile
Normal file
29
backend/python/mamba/Makefile
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
.PHONY: mamba
|
||||||
|
mamba: protogen
|
||||||
|
bash install.sh
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run: protogen
|
||||||
|
@echo "Running mamba..."
|
||||||
|
bash run.sh
|
||||||
|
@echo "mamba run."
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
|
test: protogen
|
||||||
|
@echo "Testing mamba..."
|
||||||
|
bash test.sh
|
||||||
|
@echo "mamba tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean: protogen-clean
|
||||||
|
$(RM) -r venv __pycache__
|
||||||
5
backend/python/mamba/README.md
Normal file
5
backend/python/mamba/README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Creating a separate environment for the mamba project
|
||||||
|
|
||||||
|
```
|
||||||
|
make mamba
|
||||||
|
```
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user