mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
41 Commits
feat/realt
...
chromem_st
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a1d5462ad0 | ||
|
|
2f09aa1b85 | ||
|
|
a396040886 | ||
|
|
aeb1dca52e | ||
|
|
83a8d90c52 | ||
|
|
adebd557ce | ||
|
|
0c0e015b38 | ||
|
|
390bb3f58b | ||
|
|
30739d94a4 | ||
|
|
83e2dd5dff | ||
|
|
f496d0113b | ||
|
|
a752183fb5 | ||
|
|
296b97925f | ||
|
|
d0cc3047dc | ||
|
|
032a33de49 | ||
|
|
1e9bf19c8d | ||
|
|
4bd8434ae0 | ||
|
|
958f6eb722 | ||
|
|
96306a39a0 | ||
|
|
895cd7c76a | ||
|
|
cbdbe59f16 | ||
|
|
ee7904f170 | ||
|
|
a761e01944 | ||
|
|
96f8ec0402 | ||
|
|
8027fdf1c7 | ||
|
|
212c8e1a6d | ||
|
|
78533d7230 | ||
|
|
b5eeb5c5ab | ||
|
|
b147ad0596 | ||
|
|
7d0ac1ea3f | ||
|
|
d08d97bebf | ||
|
|
acb2eb23c8 | ||
|
|
de4aa9fb1d | ||
|
|
560ba6f25e | ||
|
|
8131ddd878 | ||
|
|
26c3deb673 | ||
|
|
6d20497d45 | ||
|
|
482c6b8be4 | ||
|
|
5bba5edf45 | ||
|
|
792b866727 | ||
|
|
f053f7bde2 |
@@ -16,7 +16,7 @@ headers {
|
||||
|
||||
body:json {
|
||||
{
|
||||
"backend": "transformers-musicgen",
|
||||
"backend": "transformers",
|
||||
"model": "facebook/musicgen-small",
|
||||
"input": "80s Synths playing Jazz"
|
||||
}
|
||||
|
||||
8
.github/dependabot.yml
vendored
8
.github/dependabot.yml
vendored
@@ -81,14 +81,6 @@ updates:
|
||||
directory: "/backend/python/transformers"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/transformers-musicgen"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/vall-e-x"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/backend/python/vllm"
|
||||
schedule:
|
||||
|
||||
80
.github/workflows/image.yml
vendored
80
.github/workflows/image.yml
vendored
@@ -362,43 +362,43 @@ jobs:
|
||||
base-image: "ubuntu:22.04"
|
||||
skip-drivers: 'false'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
# parallel-builds:
|
||||
# uses: ./.github/workflows/image_build.yml
|
||||
# with:
|
||||
# tag-latest: ${{ matrix.tag-latest }}
|
||||
# tag-suffix: ${{ matrix.tag-suffix }}
|
||||
# ffmpeg: ${{ matrix.ffmpeg }}
|
||||
# image-type: ${{ matrix.image-type }}
|
||||
# build-type: ${{ matrix.build-type }}
|
||||
# cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
# platforms: ${{ matrix.platforms }}
|
||||
# runs-on: ${{ matrix.runs-on }}
|
||||
# aio: ${{ matrix.aio }}
|
||||
# base-image: ${{ matrix.base-image }}
|
||||
# grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
# makeflags: ${{ matrix.makeflags }}
|
||||
# latest-image: ${{ matrix.latest-image }}
|
||||
# latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
# skip-drivers: ${{ matrix.skip-drivers }}
|
||||
# secrets:
|
||||
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
# strategy:
|
||||
# matrix:
|
||||
# include:
|
||||
# - build-type: 'cublas'
|
||||
# cuda-major-version: "12"
|
||||
# cuda-minor-version: "0"
|
||||
# platforms: 'linux/arm64'
|
||||
# tag-latest: 'false'
|
||||
# tag-suffix: '-nvidia-l4t-arm64-core'
|
||||
# latest-image: 'latest-nvidia-l4t-arm64-core'
|
||||
# ffmpeg: 'true'
|
||||
# image-type: 'core'
|
||||
# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
# runs-on: 'self-hosted'
|
||||
# makeflags: "--jobs=4 --output-sync=target"
|
||||
# skip-drivers: 'true'
|
||||
gh-runner:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
aio: ${{ matrix.aio }}
|
||||
base-image: ${{ matrix.base-image }}
|
||||
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||
makeflags: ${{ matrix.makeflags }}
|
||||
latest-image: ${{ matrix.latest-image }}
|
||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||
skip-drivers: ${{ matrix.skip-drivers }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-nvidia-l4t-arm64-core'
|
||||
latest-image: 'latest-nvidia-l4t-arm64-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
makeflags: "--jobs=4 --output-sync=target"
|
||||
skip-drivers: 'true'
|
||||
84
.github/workflows/test-extra.yml
vendored
84
.github/workflows/test-extra.yml
vendored
@@ -35,30 +35,6 @@ jobs:
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||
|
||||
tests-sentencetransformers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test sentencetransformers
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||
|
||||
|
||||
tests-rerankers:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -153,27 +129,27 @@ jobs:
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
||||
|
||||
tests-transformers-musicgen:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
# tests-transformers-musicgen:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# uses: actions/checkout@v4
|
||||
# with:
|
||||
# submodules: true
|
||||
# - name: Dependencies
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test transformers-musicgen
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||
# - name: Test transformers-musicgen
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||
# make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||
|
||||
# tests-bark:
|
||||
# runs-on: ubuntu-latest
|
||||
@@ -260,26 +236,6 @@ jobs:
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||
tests-vallex:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
- name: Test vall-e-x
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
||||
|
||||
tests-coqui:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
3
.github/workflows/test.yml
vendored
3
.github/workflows/test.yml
vendored
@@ -100,8 +100,7 @@ jobs:
|
||||
# The python3-grpc-tools package in 22.04 is too old
|
||||
pip install --user grpcio-tools
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||
make -C backend/python/transformers
|
||||
|
||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||
make sources/go-piper && \
|
||||
|
||||
16
Dockerfile
16
Dockerfile
@@ -15,7 +15,7 @@ ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
|
||||
RUN apt-get update && \
|
||||
@@ -436,6 +436,10 @@ SHELL ["/bin/bash", "-c"]
|
||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||
|
||||
RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
||||
apt-get -qq -y install espeak-ng \
|
||||
; fi
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/coqui \
|
||||
; fi && \
|
||||
@@ -444,20 +448,14 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/diffusers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/transformers-musicgen \
|
||||
; fi
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vall-e-x \
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/kokoro \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/openvoice \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi && \
|
||||
|
||||
76
Makefile
76
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=504af20ee4eae72080a56d59d744f6774f7901ce
|
||||
CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -22,17 +22,13 @@ PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# tinydream version
|
||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a
|
||||
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
@@ -188,11 +184,6 @@ ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
ifeq ($(findstring tinydream,$(GO_TAGS)),tinydream)
|
||||
# OPTIONAL_TARGETS+=go-tiny-dream/libtinydream.a
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||
@@ -327,19 +318,6 @@ else
|
||||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
||||
endif
|
||||
|
||||
## tiny-dream
|
||||
sources/go-tiny-dream:
|
||||
mkdir -p sources/go-tiny-dream
|
||||
cd sources/go-tiny-dream && \
|
||||
git init && \
|
||||
git remote add origin $(TINYDREAM_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(TINYDREAM_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
||||
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
||||
|
||||
## whisper
|
||||
sources/whisper.cpp:
|
||||
mkdir -p sources/whisper.cpp
|
||||
@@ -353,12 +331,11 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
@@ -366,7 +343,6 @@ replace:
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
@@ -381,7 +357,6 @@ rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C sources/go-tiny-dream clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources $(OPTIONAL_TARGETS)
|
||||
@@ -497,7 +472,7 @@ test: prepare test-models/testmodel.ggml grpcs
|
||||
@echo 'Running tests'
|
||||
export GO_TAGS="tts stablediffusion debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
$(MAKE) test-llama
|
||||
$(MAKE) test-llama-gguf
|
||||
@@ -583,10 +558,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@@ -644,14 +619,6 @@ rerankers-protogen:
|
||||
rerankers-protogen-clean:
|
||||
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||
|
||||
.PHONY: sentencetransformers-protogen
|
||||
sentencetransformers-protogen:
|
||||
$(MAKE) -C backend/python/sentencetransformers protogen
|
||||
|
||||
.PHONY: sentencetransformers-protogen-clean
|
||||
sentencetransformers-protogen-clean:
|
||||
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
||||
|
||||
.PHONY: transformers-protogen
|
||||
transformers-protogen:
|
||||
$(MAKE) -C backend/python/transformers protogen
|
||||
@@ -668,21 +635,13 @@ parler-tts-protogen:
|
||||
parler-tts-protogen-clean:
|
||||
$(MAKE) -C backend/python/parler-tts protogen-clean
|
||||
|
||||
.PHONY: transformers-musicgen-protogen
|
||||
transformers-musicgen-protogen:
|
||||
$(MAKE) -C backend/python/transformers-musicgen protogen
|
||||
.PHONY: kokoro-protogen
|
||||
kokoro-protogen:
|
||||
$(MAKE) -C backend/python/kokoro protogen
|
||||
|
||||
.PHONY: transformers-musicgen-protogen-clean
|
||||
transformers-musicgen-protogen-clean:
|
||||
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
|
||||
|
||||
.PHONY: vall-e-x-protogen
|
||||
vall-e-x-protogen:
|
||||
$(MAKE) -C backend/python/vall-e-x protogen
|
||||
|
||||
.PHONY: vall-e-x-protogen-clean
|
||||
vall-e-x-protogen-clean:
|
||||
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
||||
.PHONY: kokoro-protogen-clean
|
||||
kokoro-protogen-clean:
|
||||
$(MAKE) -C backend/python/kokoro protogen-clean
|
||||
|
||||
.PHONY: openvoice-protogen
|
||||
openvoice-protogen:
|
||||
@@ -709,12 +668,10 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/mamba
|
||||
$(MAKE) -C backend/python/sentencetransformers
|
||||
$(MAKE) -C backend/python/rerankers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/transformers-musicgen
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
$(MAKE) -C backend/python/vall-e-x
|
||||
$(MAKE) -C backend/python/kokoro
|
||||
$(MAKE) -C backend/python/openvoice
|
||||
$(MAKE) -C backend/python/exllama2
|
||||
|
||||
@@ -873,13 +830,6 @@ ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/silero-vad
|
||||
endif
|
||||
|
||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/tinydream
|
||||
endif
|
||||
|
||||
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
|
||||
|
||||
10
README.md
10
README.md
@@ -92,19 +92,15 @@ local-ai run oci://localai/phi-2:latest
|
||||
|
||||
## 📰 Latest project news
|
||||
|
||||
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
|
||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723. P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
||||
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
|
||||
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
@@ -113,12 +109,10 @@ Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3A
|
||||
|
||||
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
|
||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
||||
|
||||
|
||||
@@ -21,8 +21,7 @@ service Backend {
|
||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||
|
||||
rpc StoresSet(StoresSetOptions) returns (Result) {}
|
||||
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
||||
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
||||
rpc StoresReset(StoresResetOptions) returns (Result) {}
|
||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||
|
||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||
@@ -78,19 +77,10 @@ message StoresSetOptions {
|
||||
repeated StoresValue Values = 2;
|
||||
}
|
||||
|
||||
message StoresDeleteOptions {
|
||||
message StoresResetOptions {
|
||||
repeated StoresKey Keys = 1;
|
||||
}
|
||||
|
||||
message StoresGetOptions {
|
||||
repeated StoresKey Keys = 1;
|
||||
}
|
||||
|
||||
message StoresGetResult {
|
||||
repeated StoresKey Keys = 1;
|
||||
repeated StoresValue Values = 2;
|
||||
}
|
||||
|
||||
message StoresFindOptions {
|
||||
StoresKey Key = 1;
|
||||
int32 TopK = 2;
|
||||
@@ -159,6 +149,8 @@ message Reply {
|
||||
bytes message = 1;
|
||||
int32 tokens = 2;
|
||||
int32 prompt_tokens = 3;
|
||||
double timing_prompt_processing = 4;
|
||||
double timing_token_generation = 5;
|
||||
}
|
||||
|
||||
message ModelOptions {
|
||||
@@ -348,4 +340,4 @@ message StatusResponse {
|
||||
message Message {
|
||||
string role = 1;
|
||||
string content = 2;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,6 +134,32 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
|
||||
return out;
|
||||
}
|
||||
|
||||
// Adds an RPC server
|
||||
// https://github.com/ggerganov/llama.cpp/compare/4dbc8b9cb71876e005724f4e8f73a3544646bcf5..3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
|
||||
static void add_rpc_devices(std::string servers) {
|
||||
auto rpc_servers = string_split<std::string>(servers, ',');
|
||||
if (rpc_servers.empty()) {
|
||||
throw std::invalid_argument("no RPC servers specified");
|
||||
}
|
||||
ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
|
||||
if (!rpc_reg) {
|
||||
throw std::invalid_argument("failed to find RPC backend");
|
||||
}
|
||||
typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint);
|
||||
ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
|
||||
if (!ggml_backend_rpc_add_device_fn) {
|
||||
throw std::invalid_argument("failed to find RPC device add function");
|
||||
}
|
||||
for (const auto & server : rpc_servers) {
|
||||
ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str());
|
||||
if (dev) {
|
||||
ggml_backend_device_register(dev);
|
||||
} else {
|
||||
throw std::invalid_argument("failed to register RPC device");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert a vector of completion_token_output to json
|
||||
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
|
||||
{
|
||||
@@ -2282,7 +2308,7 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
|
||||
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
||||
if (llama_grpc_servers != NULL) {
|
||||
params.rpc_servers = std::string(llama_grpc_servers);
|
||||
add_rpc_devices(std::string(llama_grpc_servers));
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
@@ -2408,6 +2434,13 @@ public:
|
||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||
reply.set_prompt_tokens(tokens_evaluated);
|
||||
|
||||
if (result.result_json.contains("timings")) {
|
||||
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
||||
reply.set_timing_prompt_processing(timing_prompt_processing);
|
||||
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
||||
reply.set_timing_token_generation(timing_token_generation);
|
||||
}
|
||||
|
||||
// Log Request Correlation Id
|
||||
LOG_VERBOSE("correlation:", {
|
||||
{ "id", data["correlation_id"] }
|
||||
@@ -2448,6 +2481,13 @@ public:
|
||||
reply->set_prompt_tokens(tokens_evaluated);
|
||||
reply->set_tokens(tokens_predicted);
|
||||
reply->set_message(completion_text);
|
||||
|
||||
if (result.result_json.contains("timings")) {
|
||||
double timing_prompt_processing = result.result_json.at("timings").value("prompt_ms", 0.0);
|
||||
reply->set_timing_prompt_processing(timing_prompt_processing);
|
||||
double timing_token_generation = result.result_json.at("timings").value("predicted_ms", 0.0);
|
||||
reply->set_timing_token_generation(timing_token_generation);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/tinydream"
|
||||
)
|
||||
|
||||
type Image struct {
|
||||
base.SingleThread
|
||||
tinydream *tinydream.TinyDream
|
||||
}
|
||||
|
||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||
var err error
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
image.tinydream, err = tinydream.New(opts.ModelFile)
|
||||
return err
|
||||
}
|
||||
|
||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
return image.tinydream.GenerateImage(
|
||||
int(opts.Height),
|
||||
int(opts.Width),
|
||||
int(opts.Step),
|
||||
int(opts.Seed),
|
||||
opts.PositivePrompt,
|
||||
opts.NegativePrompt,
|
||||
opts.Dst)
|
||||
}
|
||||
@@ -4,101 +4,36 @@ package main
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"container/heap"
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"slices"
|
||||
"runtime"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
chromem "github.com/philippgille/chromem-go"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type Store struct {
|
||||
base.SingleThread
|
||||
|
||||
// The sorted keys
|
||||
keys [][]float32
|
||||
// The sorted values
|
||||
values [][]byte
|
||||
|
||||
// If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions
|
||||
// TODO: Should we normalize incoming keys if they are not instead?
|
||||
keysAreNormalized bool
|
||||
// The first key decides the length of the keys
|
||||
keyLen int
|
||||
}
|
||||
|
||||
// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because
|
||||
// that's theoretically best for memory layout and cache locality, but this isn't optimized yet.
|
||||
type Pair struct {
|
||||
Key []float32
|
||||
Value []byte
|
||||
*chromem.DB
|
||||
*chromem.Collection
|
||||
}
|
||||
|
||||
func NewStore() *Store {
|
||||
return &Store{
|
||||
keys: make([][]float32, 0),
|
||||
values: make([][]byte, 0),
|
||||
keysAreNormalized: true,
|
||||
keyLen: -1,
|
||||
}
|
||||
}
|
||||
|
||||
func compareSlices(k1, k2 []float32) int {
|
||||
assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
|
||||
|
||||
return slices.Compare(k1, k2)
|
||||
}
|
||||
|
||||
func hasKey(unsortedSlice [][]float32, target []float32) bool {
|
||||
return slices.ContainsFunc(unsortedSlice, func(k []float32) bool {
|
||||
return compareSlices(k, target) == 0
|
||||
})
|
||||
}
|
||||
|
||||
func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) {
|
||||
return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int {
|
||||
return compareSlices(k, t)
|
||||
})
|
||||
}
|
||||
|
||||
func isSortedPairs(kvs []Pair) bool {
|
||||
for i := 1; i < len(kvs); i++ {
|
||||
if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func isSortedKeys(keys [][]float32) bool {
|
||||
for i := 1; i < len(keys); i++ {
|
||||
if compareSlices(keys[i-1], keys[i]) > 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
|
||||
ks := make([][]float32, len(keys))
|
||||
|
||||
for i, k := range keys {
|
||||
ks[i] = k.Floats
|
||||
}
|
||||
|
||||
slices.SortFunc(ks, compareSlices)
|
||||
|
||||
assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys)))
|
||||
assert(isSortedKeys(ks), "keys are not sorted")
|
||||
|
||||
return ks
|
||||
return &Store{}
|
||||
}
|
||||
|
||||
func (s *Store) Load(opts *pb.ModelOptions) error {
|
||||
db := chromem.NewDB()
|
||||
collection, err := db.CreateCollection("all-documents", nil, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.DB = db
|
||||
s.Collection = collection
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -111,156 +46,25 @@ func (s *Store) StoresSet(opts *pb.StoresSetOptions) error {
|
||||
if len(opts.Keys) != len(opts.Values) {
|
||||
return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
|
||||
}
|
||||
|
||||
if s.keyLen == -1 {
|
||||
s.keyLen = len(opts.Keys[0].Floats)
|
||||
} else {
|
||||
if len(opts.Keys[0].Floats) != s.keyLen {
|
||||
return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
|
||||
}
|
||||
}
|
||||
|
||||
kvs := make([]Pair, len(opts.Keys))
|
||||
docs := []chromem.Document{}
|
||||
|
||||
for i, k := range opts.Keys {
|
||||
if s.keysAreNormalized && !isNormalized(k.Floats) {
|
||||
s.keysAreNormalized = false
|
||||
var sample []float32
|
||||
if len(s.keys) > 5 {
|
||||
sample = k.Floats[:5]
|
||||
} else {
|
||||
sample = k.Floats
|
||||
}
|
||||
log.Debug().Msgf("Key is not normalized: %v", sample)
|
||||
}
|
||||
|
||||
kvs[i] = Pair{
|
||||
Key: k.Floats,
|
||||
Value: opts.Values[i].Bytes,
|
||||
}
|
||||
docs = append(docs, chromem.Document{
|
||||
ID: k.String(),
|
||||
Content: opts.Values[i].String(),
|
||||
})
|
||||
}
|
||||
|
||||
slices.SortFunc(kvs, func(a, b Pair) int {
|
||||
return compareSlices(a.Key, b.Key)
|
||||
})
|
||||
|
||||
assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys)))
|
||||
assert(isSortedPairs(kvs), "keys are not sorted")
|
||||
|
||||
l := len(kvs) + len(s.keys)
|
||||
merge_ks := make([][]float32, 0, l)
|
||||
merge_vs := make([][]byte, 0, l)
|
||||
|
||||
i, j := 0, 0
|
||||
for {
|
||||
if i+j >= l {
|
||||
break
|
||||
}
|
||||
|
||||
if i >= len(kvs) {
|
||||
merge_ks = append(merge_ks, s.keys[j])
|
||||
merge_vs = append(merge_vs, s.values[j])
|
||||
j++
|
||||
continue
|
||||
}
|
||||
|
||||
if j >= len(s.keys) {
|
||||
merge_ks = append(merge_ks, kvs[i].Key)
|
||||
merge_vs = append(merge_vs, kvs[i].Value)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
c := compareSlices(kvs[i].Key, s.keys[j])
|
||||
if c < 0 {
|
||||
merge_ks = append(merge_ks, kvs[i].Key)
|
||||
merge_vs = append(merge_vs, kvs[i].Value)
|
||||
i++
|
||||
} else if c > 0 {
|
||||
merge_ks = append(merge_ks, s.keys[j])
|
||||
merge_vs = append(merge_vs, s.values[j])
|
||||
j++
|
||||
} else {
|
||||
merge_ks = append(merge_ks, kvs[i].Key)
|
||||
merge_vs = append(merge_vs, kvs[i].Value)
|
||||
i++
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l))
|
||||
assert(isSortedKeys(merge_ks), "merge keys are not sorted")
|
||||
|
||||
s.keys = merge_ks
|
||||
s.values = merge_vs
|
||||
|
||||
return nil
|
||||
return s.Collection.AddDocuments(context.Background(), docs, runtime.NumCPU())
|
||||
}
|
||||
|
||||
func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error {
|
||||
if len(opts.Keys) == 0 {
|
||||
return fmt.Errorf("no keys to delete")
|
||||
func (s *Store) StoresReset(opts *pb.StoresResetOptions) error {
|
||||
err := s.DB.DeleteCollection("all-documents")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(opts.Keys) == 0 {
|
||||
return fmt.Errorf("no keys to add")
|
||||
}
|
||||
|
||||
if s.keyLen == -1 {
|
||||
s.keyLen = len(opts.Keys[0].Floats)
|
||||
} else {
|
||||
if len(opts.Keys[0].Floats) != s.keyLen {
|
||||
return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
|
||||
}
|
||||
}
|
||||
|
||||
ks := sortIntoKeySlicese(opts.Keys)
|
||||
|
||||
l := len(s.keys) - len(ks)
|
||||
merge_ks := make([][]float32, 0, l)
|
||||
merge_vs := make([][]byte, 0, l)
|
||||
|
||||
tail_ks := s.keys
|
||||
tail_vs := s.values
|
||||
for _, k := range ks {
|
||||
j, found := findInSortedSlice(tail_ks, k)
|
||||
|
||||
if found {
|
||||
merge_ks = append(merge_ks, tail_ks[:j]...)
|
||||
merge_vs = append(merge_vs, tail_vs[:j]...)
|
||||
tail_ks = tail_ks[j+1:]
|
||||
tail_vs = tail_vs[j+1:]
|
||||
} else {
|
||||
assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k))
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs))
|
||||
}
|
||||
|
||||
merge_ks = append(merge_ks, tail_ks...)
|
||||
merge_vs = append(merge_vs, tail_vs...)
|
||||
|
||||
assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys)))
|
||||
|
||||
s.keys = merge_ks
|
||||
s.values = merge_vs
|
||||
|
||||
assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l))
|
||||
assert(isSortedKeys(s.keys), "keys are not sorted")
|
||||
assert(func() bool {
|
||||
for _, k := range ks {
|
||||
if _, found := findInSortedSlice(s.keys, k); found {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}(), "Keys to delete still present")
|
||||
|
||||
if len(s.keys) != l {
|
||||
log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l)
|
||||
}
|
||||
|
||||
return nil
|
||||
s.Collection, err = s.CreateCollection("all-documents", nil, nil)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
|
||||
|
||||
20
backend/python/kokoro/Makefile
Normal file
20
backend/python/kokoro/Makefile
Normal file
@@ -0,0 +1,20 @@
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: install
|
||||
install:
|
||||
bash install.sh
|
||||
$(MAKE) protogen
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
bash protogen.sh
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for HuggingFace SentenceTransformer models.
|
||||
Extra gRPC server for Kokoro models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
@@ -8,15 +8,17 @@ import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import soundfile as sf
|
||||
import grpc
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from models import build_model
|
||||
from kokoro import generate
|
||||
import torch
|
||||
|
||||
SAMPLE_RATE = 22050
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
@@ -55,42 +57,57 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
self.MODEL = build_model(request.ModelFile, device)
|
||||
options = request.Options
|
||||
# Find the voice from the options, options are a list of strings in this form optname:optvalue:
|
||||
VOICE_NAME = None
|
||||
for opt in options:
|
||||
if opt.startswith("voice:"):
|
||||
VOICE_NAME = opt.split(":")[1]
|
||||
break
|
||||
if VOICE_NAME is None:
|
||||
return backend_pb2.Result(success=False, message=f"No voice specified in options")
|
||||
MODELPATH = request.ModelPath
|
||||
# If voice name contains a plus, split it and load the two models and combine them
|
||||
if "+" in VOICE_NAME:
|
||||
voice1, voice2 = VOICE_NAME.split("+")
|
||||
voice1 = torch.load(f'{MODELPATH}/{voice1}.pt', weights_only=True).to(device)
|
||||
voice2 = torch.load(f'{MODELPATH}/{voice2}.pt', weights_only=True).to(device)
|
||||
self.VOICEPACK = torch.mean(torch.stack([voice1, voice2]), dim=0)
|
||||
else:
|
||||
self.VOICEPACK = torch.load(f'{MODELPATH}/{VOICE_NAME}.pt', weights_only=True).to(device)
|
||||
|
||||
self.VOICE_NAME = VOICE_NAME
|
||||
|
||||
print(f'Loaded voice: {VOICE_NAME}')
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
# Implement your logic here for the LoadModel service
|
||||
# Replace this with your desired response
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Embedding(self, request, context):
|
||||
"""
|
||||
A gRPC method that calculates embeddings for a given sentence.
|
||||
|
||||
Args:
|
||||
request: An EmbeddingRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
An EmbeddingResult object that contains the calculated embeddings.
|
||||
"""
|
||||
# Implement your logic here for the Embedding service
|
||||
# Replace this with your desired response
|
||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||
sentence_embeddings = self.model.encode(request.Embeddings)
|
||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
|
||||
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
audio, out_ps = generate(self.MODEL, request.text, self.VOICEPACK, lang=self.VOICE_NAME)
|
||||
print(out_ps)
|
||||
sf.write(request.dst, audio, SAMPLE_RATE)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
print("[Kokoro] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
print("[Kokoro] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
@@ -110,5 +127,5 @@ if __name__ == "__main__":
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"[Kokoro] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
||||
524
backend/python/kokoro/istftnet.py
Normal file
524
backend/python/kokoro/istftnet.py
Normal file
@@ -0,0 +1,524 @@
|
||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/istftnet.py
|
||||
# https://github.com/yl4579/StyleTTS2/blob/main/Modules/istftnet.py
|
||||
from scipy.signal import get_window
|
||||
from torch.nn import Conv1d, ConvTranspose1d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
# https://github.com/yl4579/StyleTTS2/blob/main/Modules/utils.py
|
||||
def init_weights(m, mean=0.0, std=0.01):
|
||||
classname = m.__class__.__name__
|
||||
if classname.find("Conv") != -1:
|
||||
m.weight.data.normal_(mean, std)
|
||||
|
||||
def get_padding(kernel_size, dilation=1):
|
||||
return int((kernel_size*dilation - dilation)/2)
|
||||
|
||||
LRELU_SLOPE = 0.1
|
||||
|
||||
class AdaIN1d(nn.Module):
|
||||
def __init__(self, style_dim, num_features):
|
||||
super().__init__()
|
||||
self.norm = nn.InstanceNorm1d(num_features, affine=False)
|
||||
self.fc = nn.Linear(style_dim, num_features*2)
|
||||
|
||||
def forward(self, x, s):
|
||||
h = self.fc(s)
|
||||
h = h.view(h.size(0), h.size(1), 1)
|
||||
gamma, beta = torch.chunk(h, chunks=2, dim=1)
|
||||
return (1 + gamma) * self.norm(x) + beta
|
||||
|
||||
class AdaINResBlock1(torch.nn.Module):
|
||||
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5), style_dim=64):
|
||||
super(AdaINResBlock1, self).__init__()
|
||||
self.convs1 = nn.ModuleList([
|
||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
|
||||
padding=get_padding(kernel_size, dilation[0]))),
|
||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
|
||||
padding=get_padding(kernel_size, dilation[1]))),
|
||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
|
||||
padding=get_padding(kernel_size, dilation[2])))
|
||||
])
|
||||
self.convs1.apply(init_weights)
|
||||
|
||||
self.convs2 = nn.ModuleList([
|
||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
||||
padding=get_padding(kernel_size, 1))),
|
||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
||||
padding=get_padding(kernel_size, 1))),
|
||||
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
||||
padding=get_padding(kernel_size, 1)))
|
||||
])
|
||||
self.convs2.apply(init_weights)
|
||||
|
||||
self.adain1 = nn.ModuleList([
|
||||
AdaIN1d(style_dim, channels),
|
||||
AdaIN1d(style_dim, channels),
|
||||
AdaIN1d(style_dim, channels),
|
||||
])
|
||||
|
||||
self.adain2 = nn.ModuleList([
|
||||
AdaIN1d(style_dim, channels),
|
||||
AdaIN1d(style_dim, channels),
|
||||
AdaIN1d(style_dim, channels),
|
||||
])
|
||||
|
||||
self.alpha1 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs1))])
|
||||
self.alpha2 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs2))])
|
||||
|
||||
|
||||
def forward(self, x, s):
|
||||
for c1, c2, n1, n2, a1, a2 in zip(self.convs1, self.convs2, self.adain1, self.adain2, self.alpha1, self.alpha2):
|
||||
xt = n1(x, s)
|
||||
xt = xt + (1 / a1) * (torch.sin(a1 * xt) ** 2) # Snake1D
|
||||
xt = c1(xt)
|
||||
xt = n2(xt, s)
|
||||
xt = xt + (1 / a2) * (torch.sin(a2 * xt) ** 2) # Snake1D
|
||||
xt = c2(xt)
|
||||
x = xt + x
|
||||
return x
|
||||
|
||||
def remove_weight_norm(self):
|
||||
for l in self.convs1:
|
||||
remove_weight_norm(l)
|
||||
for l in self.convs2:
|
||||
remove_weight_norm(l)
|
||||
|
||||
class TorchSTFT(torch.nn.Module):
|
||||
def __init__(self, filter_length=800, hop_length=200, win_length=800, window='hann'):
|
||||
super().__init__()
|
||||
self.filter_length = filter_length
|
||||
self.hop_length = hop_length
|
||||
self.win_length = win_length
|
||||
self.window = torch.from_numpy(get_window(window, win_length, fftbins=True).astype(np.float32))
|
||||
|
||||
def transform(self, input_data):
|
||||
forward_transform = torch.stft(
|
||||
input_data,
|
||||
self.filter_length, self.hop_length, self.win_length, window=self.window.to(input_data.device),
|
||||
return_complex=True)
|
||||
|
||||
return torch.abs(forward_transform), torch.angle(forward_transform)
|
||||
|
||||
def inverse(self, magnitude, phase):
|
||||
inverse_transform = torch.istft(
|
||||
magnitude * torch.exp(phase * 1j),
|
||||
self.filter_length, self.hop_length, self.win_length, window=self.window.to(magnitude.device))
|
||||
|
||||
return inverse_transform.unsqueeze(-2) # unsqueeze to stay consistent with conv_transpose1d implementation
|
||||
|
||||
def forward(self, input_data):
|
||||
self.magnitude, self.phase = self.transform(input_data)
|
||||
reconstruction = self.inverse(self.magnitude, self.phase)
|
||||
return reconstruction
|
||||
|
||||
class SineGen(torch.nn.Module):
|
||||
""" Definition of sine generator
|
||||
SineGen(samp_rate, harmonic_num = 0,
|
||||
sine_amp = 0.1, noise_std = 0.003,
|
||||
voiced_threshold = 0,
|
||||
flag_for_pulse=False)
|
||||
samp_rate: sampling rate in Hz
|
||||
harmonic_num: number of harmonic overtones (default 0)
|
||||
sine_amp: amplitude of sine-wavefrom (default 0.1)
|
||||
noise_std: std of Gaussian noise (default 0.003)
|
||||
voiced_thoreshold: F0 threshold for U/V classification (default 0)
|
||||
flag_for_pulse: this SinGen is used inside PulseGen (default False)
|
||||
Note: when flag_for_pulse is True, the first time step of a voiced
|
||||
segment is always sin(np.pi) or cos(0)
|
||||
"""
|
||||
|
||||
def __init__(self, samp_rate, upsample_scale, harmonic_num=0,
|
||||
sine_amp=0.1, noise_std=0.003,
|
||||
voiced_threshold=0,
|
||||
flag_for_pulse=False):
|
||||
super(SineGen, self).__init__()
|
||||
self.sine_amp = sine_amp
|
||||
self.noise_std = noise_std
|
||||
self.harmonic_num = harmonic_num
|
||||
self.dim = self.harmonic_num + 1
|
||||
self.sampling_rate = samp_rate
|
||||
self.voiced_threshold = voiced_threshold
|
||||
self.flag_for_pulse = flag_for_pulse
|
||||
self.upsample_scale = upsample_scale
|
||||
|
||||
def _f02uv(self, f0):
|
||||
# generate uv signal
|
||||
uv = (f0 > self.voiced_threshold).type(torch.float32)
|
||||
return uv
|
||||
|
||||
def _f02sine(self, f0_values):
|
||||
""" f0_values: (batchsize, length, dim)
|
||||
where dim indicates fundamental tone and overtones
|
||||
"""
|
||||
# convert to F0 in rad. The interger part n can be ignored
|
||||
# because 2 * np.pi * n doesn't affect phase
|
||||
rad_values = (f0_values / self.sampling_rate) % 1
|
||||
|
||||
# initial phase noise (no noise for fundamental component)
|
||||
rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \
|
||||
device=f0_values.device)
|
||||
rand_ini[:, 0] = 0
|
||||
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
|
||||
|
||||
# instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad)
|
||||
if not self.flag_for_pulse:
|
||||
# # for normal case
|
||||
|
||||
# # To prevent torch.cumsum numerical overflow,
|
||||
# # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
|
||||
# # Buffer tmp_over_one_idx indicates the time step to add -1.
|
||||
# # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi
|
||||
# tmp_over_one = torch.cumsum(rad_values, 1) % 1
|
||||
# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
|
||||
# cumsum_shift = torch.zeros_like(rad_values)
|
||||
# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
|
||||
|
||||
# phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
|
||||
rad_values = torch.nn.functional.interpolate(rad_values.transpose(1, 2),
|
||||
scale_factor=1/self.upsample_scale,
|
||||
mode="linear").transpose(1, 2)
|
||||
|
||||
# tmp_over_one = torch.cumsum(rad_values, 1) % 1
|
||||
# tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
|
||||
# cumsum_shift = torch.zeros_like(rad_values)
|
||||
# cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
|
||||
|
||||
phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
|
||||
phase = torch.nn.functional.interpolate(phase.transpose(1, 2) * self.upsample_scale,
|
||||
scale_factor=self.upsample_scale, mode="linear").transpose(1, 2)
|
||||
sines = torch.sin(phase)
|
||||
|
||||
else:
|
||||
# If necessary, make sure that the first time step of every
|
||||
# voiced segments is sin(pi) or cos(0)
|
||||
# This is used for pulse-train generation
|
||||
|
||||
# identify the last time step in unvoiced segments
|
||||
uv = self._f02uv(f0_values)
|
||||
uv_1 = torch.roll(uv, shifts=-1, dims=1)
|
||||
uv_1[:, -1, :] = 1
|
||||
u_loc = (uv < 1) * (uv_1 > 0)
|
||||
|
||||
# get the instantanouse phase
|
||||
tmp_cumsum = torch.cumsum(rad_values, dim=1)
|
||||
# different batch needs to be processed differently
|
||||
for idx in range(f0_values.shape[0]):
|
||||
temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :]
|
||||
temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :]
|
||||
# stores the accumulation of i.phase within
|
||||
# each voiced segments
|
||||
tmp_cumsum[idx, :, :] = 0
|
||||
tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum
|
||||
|
||||
# rad_values - tmp_cumsum: remove the accumulation of i.phase
|
||||
# within the previous voiced segment.
|
||||
i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1)
|
||||
|
||||
# get the sines
|
||||
sines = torch.cos(i_phase * 2 * np.pi)
|
||||
return sines
|
||||
|
||||
def forward(self, f0):
|
||||
""" sine_tensor, uv = forward(f0)
|
||||
input F0: tensor(batchsize=1, length, dim=1)
|
||||
f0 for unvoiced steps should be 0
|
||||
output sine_tensor: tensor(batchsize=1, length, dim)
|
||||
output uv: tensor(batchsize=1, length, 1)
|
||||
"""
|
||||
f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
|
||||
device=f0.device)
|
||||
# fundamental component
|
||||
fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
|
||||
|
||||
# generate sine waveforms
|
||||
sine_waves = self._f02sine(fn) * self.sine_amp
|
||||
|
||||
# generate uv signal
|
||||
# uv = torch.ones(f0.shape)
|
||||
# uv = uv * (f0 > self.voiced_threshold)
|
||||
uv = self._f02uv(f0)
|
||||
|
||||
# noise: for unvoiced should be similar to sine_amp
|
||||
# std = self.sine_amp/3 -> max value ~ self.sine_amp
|
||||
# . for voiced regions is self.noise_std
|
||||
noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
|
||||
noise = noise_amp * torch.randn_like(sine_waves)
|
||||
|
||||
# first: set the unvoiced part to 0 by uv
|
||||
# then: additive noise
|
||||
sine_waves = sine_waves * uv + noise
|
||||
return sine_waves, uv, noise
|
||||
|
||||
|
||||
class SourceModuleHnNSF(torch.nn.Module):
|
||||
""" SourceModule for hn-nsf
|
||||
SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1,
|
||||
add_noise_std=0.003, voiced_threshod=0)
|
||||
sampling_rate: sampling_rate in Hz
|
||||
harmonic_num: number of harmonic above F0 (default: 0)
|
||||
sine_amp: amplitude of sine source signal (default: 0.1)
|
||||
add_noise_std: std of additive Gaussian noise (default: 0.003)
|
||||
note that amplitude of noise in unvoiced is decided
|
||||
by sine_amp
|
||||
voiced_threshold: threhold to set U/V given F0 (default: 0)
|
||||
Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
|
||||
F0_sampled (batchsize, length, 1)
|
||||
Sine_source (batchsize, length, 1)
|
||||
noise_source (batchsize, length 1)
|
||||
uv (batchsize, length, 1)
|
||||
"""
|
||||
|
||||
def __init__(self, sampling_rate, upsample_scale, harmonic_num=0, sine_amp=0.1,
|
||||
add_noise_std=0.003, voiced_threshod=0):
|
||||
super(SourceModuleHnNSF, self).__init__()
|
||||
|
||||
self.sine_amp = sine_amp
|
||||
self.noise_std = add_noise_std
|
||||
|
||||
# to produce sine waveforms
|
||||
self.l_sin_gen = SineGen(sampling_rate, upsample_scale, harmonic_num,
|
||||
sine_amp, add_noise_std, voiced_threshod)
|
||||
|
||||
# to merge source harmonics into a single excitation
|
||||
self.l_linear = torch.nn.Linear(harmonic_num + 1, 1)
|
||||
self.l_tanh = torch.nn.Tanh()
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
|
||||
F0_sampled (batchsize, length, 1)
|
||||
Sine_source (batchsize, length, 1)
|
||||
noise_source (batchsize, length 1)
|
||||
"""
|
||||
# source for harmonic branch
|
||||
with torch.no_grad():
|
||||
sine_wavs, uv, _ = self.l_sin_gen(x)
|
||||
sine_merge = self.l_tanh(self.l_linear(sine_wavs))
|
||||
|
||||
# source for noise branch, in the same shape as uv
|
||||
noise = torch.randn_like(uv) * self.sine_amp / 3
|
||||
return sine_merge, noise, uv
|
||||
def padDiff(x):
|
||||
return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0)
|
||||
|
||||
|
||||
class Generator(torch.nn.Module):
|
||||
def __init__(self, style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size):
|
||||
super(Generator, self).__init__()
|
||||
|
||||
self.num_kernels = len(resblock_kernel_sizes)
|
||||
self.num_upsamples = len(upsample_rates)
|
||||
resblock = AdaINResBlock1
|
||||
|
||||
self.m_source = SourceModuleHnNSF(
|
||||
sampling_rate=24000,
|
||||
upsample_scale=np.prod(upsample_rates) * gen_istft_hop_size,
|
||||
harmonic_num=8, voiced_threshod=10)
|
||||
self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates) * gen_istft_hop_size)
|
||||
self.noise_convs = nn.ModuleList()
|
||||
self.noise_res = nn.ModuleList()
|
||||
|
||||
self.ups = nn.ModuleList()
|
||||
for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
|
||||
self.ups.append(weight_norm(
|
||||
ConvTranspose1d(upsample_initial_channel//(2**i), upsample_initial_channel//(2**(i+1)),
|
||||
k, u, padding=(k-u)//2)))
|
||||
|
||||
self.resblocks = nn.ModuleList()
|
||||
for i in range(len(self.ups)):
|
||||
ch = upsample_initial_channel//(2**(i+1))
|
||||
for j, (k, d) in enumerate(zip(resblock_kernel_sizes,resblock_dilation_sizes)):
|
||||
self.resblocks.append(resblock(ch, k, d, style_dim))
|
||||
|
||||
c_cur = upsample_initial_channel // (2 ** (i + 1))
|
||||
|
||||
if i + 1 < len(upsample_rates): #
|
||||
stride_f0 = np.prod(upsample_rates[i + 1:])
|
||||
self.noise_convs.append(Conv1d(
|
||||
gen_istft_n_fft + 2, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
|
||||
self.noise_res.append(resblock(c_cur, 7, [1,3,5], style_dim))
|
||||
else:
|
||||
self.noise_convs.append(Conv1d(gen_istft_n_fft + 2, c_cur, kernel_size=1))
|
||||
self.noise_res.append(resblock(c_cur, 11, [1,3,5], style_dim))
|
||||
|
||||
|
||||
self.post_n_fft = gen_istft_n_fft
|
||||
self.conv_post = weight_norm(Conv1d(ch, self.post_n_fft + 2, 7, 1, padding=3))
|
||||
self.ups.apply(init_weights)
|
||||
self.conv_post.apply(init_weights)
|
||||
self.reflection_pad = torch.nn.ReflectionPad1d((1, 0))
|
||||
self.stft = TorchSTFT(filter_length=gen_istft_n_fft, hop_length=gen_istft_hop_size, win_length=gen_istft_n_fft)
|
||||
|
||||
|
||||
def forward(self, x, s, f0):
|
||||
with torch.no_grad():
|
||||
f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2) # bs,n,t
|
||||
|
||||
har_source, noi_source, uv = self.m_source(f0)
|
||||
har_source = har_source.transpose(1, 2).squeeze(1)
|
||||
har_spec, har_phase = self.stft.transform(har_source)
|
||||
har = torch.cat([har_spec, har_phase], dim=1)
|
||||
|
||||
for i in range(self.num_upsamples):
|
||||
x = F.leaky_relu(x, LRELU_SLOPE)
|
||||
x_source = self.noise_convs[i](har)
|
||||
x_source = self.noise_res[i](x_source, s)
|
||||
|
||||
x = self.ups[i](x)
|
||||
if i == self.num_upsamples - 1:
|
||||
x = self.reflection_pad(x)
|
||||
|
||||
x = x + x_source
|
||||
xs = None
|
||||
for j in range(self.num_kernels):
|
||||
if xs is None:
|
||||
xs = self.resblocks[i*self.num_kernels+j](x, s)
|
||||
else:
|
||||
xs += self.resblocks[i*self.num_kernels+j](x, s)
|
||||
x = xs / self.num_kernels
|
||||
x = F.leaky_relu(x)
|
||||
x = self.conv_post(x)
|
||||
spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
|
||||
phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
|
||||
return self.stft.inverse(spec, phase)
|
||||
|
||||
def fw_phase(self, x, s):
|
||||
for i in range(self.num_upsamples):
|
||||
x = F.leaky_relu(x, LRELU_SLOPE)
|
||||
x = self.ups[i](x)
|
||||
xs = None
|
||||
for j in range(self.num_kernels):
|
||||
if xs is None:
|
||||
xs = self.resblocks[i*self.num_kernels+j](x, s)
|
||||
else:
|
||||
xs += self.resblocks[i*self.num_kernels+j](x, s)
|
||||
x = xs / self.num_kernels
|
||||
x = F.leaky_relu(x)
|
||||
x = self.reflection_pad(x)
|
||||
x = self.conv_post(x)
|
||||
spec = torch.exp(x[:,:self.post_n_fft // 2 + 1, :])
|
||||
phase = torch.sin(x[:, self.post_n_fft // 2 + 1:, :])
|
||||
return spec, phase
|
||||
|
||||
def remove_weight_norm(self):
|
||||
print('Removing weight norm...')
|
||||
for l in self.ups:
|
||||
remove_weight_norm(l)
|
||||
for l in self.resblocks:
|
||||
l.remove_weight_norm()
|
||||
remove_weight_norm(self.conv_pre)
|
||||
remove_weight_norm(self.conv_post)
|
||||
|
||||
|
||||
class AdainResBlk1d(nn.Module):
|
||||
def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
|
||||
upsample='none', dropout_p=0.0):
|
||||
super().__init__()
|
||||
self.actv = actv
|
||||
self.upsample_type = upsample
|
||||
self.upsample = UpSample1d(upsample)
|
||||
self.learned_sc = dim_in != dim_out
|
||||
self._build_weights(dim_in, dim_out, style_dim)
|
||||
self.dropout = nn.Dropout(dropout_p)
|
||||
|
||||
if upsample == 'none':
|
||||
self.pool = nn.Identity()
|
||||
else:
|
||||
self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
|
||||
|
||||
|
||||
def _build_weights(self, dim_in, dim_out, style_dim):
|
||||
self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
|
||||
self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
|
||||
self.norm1 = AdaIN1d(style_dim, dim_in)
|
||||
self.norm2 = AdaIN1d(style_dim, dim_out)
|
||||
if self.learned_sc:
|
||||
self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
|
||||
|
||||
def _shortcut(self, x):
|
||||
x = self.upsample(x)
|
||||
if self.learned_sc:
|
||||
x = self.conv1x1(x)
|
||||
return x
|
||||
|
||||
def _residual(self, x, s):
|
||||
x = self.norm1(x, s)
|
||||
x = self.actv(x)
|
||||
x = self.pool(x)
|
||||
x = self.conv1(self.dropout(x))
|
||||
x = self.norm2(x, s)
|
||||
x = self.actv(x)
|
||||
x = self.conv2(self.dropout(x))
|
||||
return x
|
||||
|
||||
def forward(self, x, s):
|
||||
out = self._residual(x, s)
|
||||
out = (out + self._shortcut(x)) / np.sqrt(2)
|
||||
return out
|
||||
|
||||
class UpSample1d(nn.Module):
|
||||
def __init__(self, layer_type):
|
||||
super().__init__()
|
||||
self.layer_type = layer_type
|
||||
|
||||
def forward(self, x):
|
||||
if self.layer_type == 'none':
|
||||
return x
|
||||
else:
|
||||
return F.interpolate(x, scale_factor=2, mode='nearest')
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(self, dim_in=512, F0_channel=512, style_dim=64, dim_out=80,
|
||||
resblock_kernel_sizes = [3,7,11],
|
||||
upsample_rates = [10, 6],
|
||||
upsample_initial_channel=512,
|
||||
resblock_dilation_sizes=[[1,3,5], [1,3,5], [1,3,5]],
|
||||
upsample_kernel_sizes=[20, 12],
|
||||
gen_istft_n_fft=20, gen_istft_hop_size=5):
|
||||
super().__init__()
|
||||
|
||||
self.decode = nn.ModuleList()
|
||||
|
||||
self.encode = AdainResBlk1d(dim_in + 2, 1024, style_dim)
|
||||
|
||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
|
||||
self.decode.append(AdainResBlk1d(1024 + 2 + 64, 512, style_dim, upsample=True))
|
||||
|
||||
self.F0_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
|
||||
|
||||
self.N_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
|
||||
|
||||
self.asr_res = nn.Sequential(
|
||||
weight_norm(nn.Conv1d(512, 64, kernel_size=1)),
|
||||
)
|
||||
|
||||
|
||||
self.generator = Generator(style_dim, resblock_kernel_sizes, upsample_rates,
|
||||
upsample_initial_channel, resblock_dilation_sizes,
|
||||
upsample_kernel_sizes, gen_istft_n_fft, gen_istft_hop_size)
|
||||
|
||||
def forward(self, asr, F0_curve, N, s):
|
||||
F0 = self.F0_conv(F0_curve.unsqueeze(1))
|
||||
N = self.N_conv(N.unsqueeze(1))
|
||||
|
||||
x = torch.cat([asr, F0, N], axis=1)
|
||||
x = self.encode(x, s)
|
||||
|
||||
asr_res = self.asr_res(asr)
|
||||
|
||||
res = True
|
||||
for block in self.decode:
|
||||
if res:
|
||||
x = torch.cat([x, asr_res, F0, N], axis=1)
|
||||
x = block(x, s)
|
||||
if block.upsample_type != "none":
|
||||
res = False
|
||||
|
||||
x = self.generator(x, s, F0_curve)
|
||||
return x
|
||||
166
backend/python/kokoro/kokoro.py
Normal file
166
backend/python/kokoro/kokoro.py
Normal file
@@ -0,0 +1,166 @@
|
||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/kokoro.py
|
||||
import phonemizer
|
||||
import re
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
def split_num(num):
|
||||
num = num.group()
|
||||
if '.' in num:
|
||||
return num
|
||||
elif ':' in num:
|
||||
h, m = [int(n) for n in num.split(':')]
|
||||
if m == 0:
|
||||
return f"{h} o'clock"
|
||||
elif m < 10:
|
||||
return f'{h} oh {m}'
|
||||
return f'{h} {m}'
|
||||
year = int(num[:4])
|
||||
if year < 1100 or year % 1000 < 10:
|
||||
return num
|
||||
left, right = num[:2], int(num[2:4])
|
||||
s = 's' if num.endswith('s') else ''
|
||||
if 100 <= year % 1000 <= 999:
|
||||
if right == 0:
|
||||
return f'{left} hundred{s}'
|
||||
elif right < 10:
|
||||
return f'{left} oh {right}{s}'
|
||||
return f'{left} {right}{s}'
|
||||
|
||||
def flip_money(m):
|
||||
m = m.group()
|
||||
bill = 'dollar' if m[0] == '$' else 'pound'
|
||||
if m[-1].isalpha():
|
||||
return f'{m[1:]} {bill}s'
|
||||
elif '.' not in m:
|
||||
s = '' if m[1:] == '1' else 's'
|
||||
return f'{m[1:]} {bill}{s}'
|
||||
b, c = m[1:].split('.')
|
||||
s = '' if b == '1' else 's'
|
||||
c = int(c.ljust(2, '0'))
|
||||
coins = f"cent{'' if c == 1 else 's'}" if m[0] == '$' else ('penny' if c == 1 else 'pence')
|
||||
return f'{b} {bill}{s} and {c} {coins}'
|
||||
|
||||
def point_num(num):
|
||||
a, b = num.group().split('.')
|
||||
return ' point '.join([a, ' '.join(b)])
|
||||
|
||||
def normalize_text(text):
|
||||
text = text.replace(chr(8216), "'").replace(chr(8217), "'")
|
||||
text = text.replace('«', chr(8220)).replace('»', chr(8221))
|
||||
text = text.replace(chr(8220), '"').replace(chr(8221), '"')
|
||||
text = text.replace('(', '«').replace(')', '»')
|
||||
for a, b in zip('、。!,:;?', ',.!,:;?'):
|
||||
text = text.replace(a, b+' ')
|
||||
text = re.sub(r'[^\S \n]', ' ', text)
|
||||
text = re.sub(r' +', ' ', text)
|
||||
text = re.sub(r'(?<=\n) +(?=\n)', '', text)
|
||||
text = re.sub(r'\bD[Rr]\.(?= [A-Z])', 'Doctor', text)
|
||||
text = re.sub(r'\b(?:Mr\.|MR\.(?= [A-Z]))', 'Mister', text)
|
||||
text = re.sub(r'\b(?:Ms\.|MS\.(?= [A-Z]))', 'Miss', text)
|
||||
text = re.sub(r'\b(?:Mrs\.|MRS\.(?= [A-Z]))', 'Mrs', text)
|
||||
text = re.sub(r'\betc\.(?! [A-Z])', 'etc', text)
|
||||
text = re.sub(r'(?i)\b(y)eah?\b', r"\1e'a", text)
|
||||
text = re.sub(r'\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)', split_num, text)
|
||||
text = re.sub(r'(?<=\d),(?=\d)', '', text)
|
||||
text = re.sub(r'(?i)[$£]\d+(?:\.\d+)?(?: hundred| thousand| (?:[bm]|tr)illion)*\b|[$£]\d+\.\d\d?\b', flip_money, text)
|
||||
text = re.sub(r'\d*\.\d+', point_num, text)
|
||||
text = re.sub(r'(?<=\d)-(?=\d)', ' to ', text)
|
||||
text = re.sub(r'(?<=\d)S', ' S', text)
|
||||
text = re.sub(r"(?<=[BCDFGHJ-NP-TV-Z])'?s\b", "'S", text)
|
||||
text = re.sub(r"(?<=X')S\b", 's', text)
|
||||
text = re.sub(r'(?:[A-Za-z]\.){2,} [a-z]', lambda m: m.group().replace('.', '-'), text)
|
||||
text = re.sub(r'(?i)(?<=[A-Z])\.(?=[A-Z])', '-', text)
|
||||
return text.strip()
|
||||
|
||||
def get_vocab():
|
||||
_pad = "$"
|
||||
_punctuation = ';:,.!?¡¿—…"«»“” '
|
||||
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
||||
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
|
||||
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
|
||||
dicts = {}
|
||||
for i in range(len((symbols))):
|
||||
dicts[symbols[i]] = i
|
||||
return dicts
|
||||
|
||||
VOCAB = get_vocab()
|
||||
def tokenize(ps):
|
||||
return [i for i in map(VOCAB.get, ps) if i is not None]
|
||||
|
||||
phonemizers = dict(
|
||||
a=phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True),
|
||||
b=phonemizer.backend.EspeakBackend(language='en-gb', preserve_punctuation=True, with_stress=True),
|
||||
)
|
||||
def phonemize(text, lang, norm=True):
|
||||
if norm:
|
||||
text = normalize_text(text)
|
||||
ps = phonemizers[lang].phonemize([text])
|
||||
ps = ps[0] if ps else ''
|
||||
# https://en.wiktionary.org/wiki/kokoro#English
|
||||
ps = ps.replace('kəkˈoːɹoʊ', 'kˈoʊkəɹoʊ').replace('kəkˈɔːɹəʊ', 'kˈəʊkəɹəʊ')
|
||||
ps = ps.replace('ʲ', 'j').replace('r', 'ɹ').replace('x', 'k').replace('ɬ', 'l')
|
||||
ps = re.sub(r'(?<=[a-zɹː])(?=hˈʌndɹɪd)', ' ', ps)
|
||||
ps = re.sub(r' z(?=[;:,.!?¡¿—…"«»“” ]|$)', 'z', ps)
|
||||
if lang == 'a':
|
||||
ps = re.sub(r'(?<=nˈaɪn)ti(?!ː)', 'di', ps)
|
||||
ps = ''.join(filter(lambda p: p in VOCAB, ps))
|
||||
return ps.strip()
|
||||
|
||||
def length_to_mask(lengths):
|
||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||
return mask
|
||||
|
||||
@torch.no_grad()
|
||||
def forward(model, tokens, ref_s, speed):
|
||||
device = ref_s.device
|
||||
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
||||
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
||||
text_mask = length_to_mask(input_lengths).to(device)
|
||||
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
|
||||
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
|
||||
s = ref_s[:, 128:]
|
||||
d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
|
||||
x, _ = model.predictor.lstm(d)
|
||||
duration = model.predictor.duration_proj(x)
|
||||
duration = torch.sigmoid(duration).sum(axis=-1) / speed
|
||||
pred_dur = torch.round(duration).clamp(min=1).long()
|
||||
pred_aln_trg = torch.zeros(input_lengths, pred_dur.sum().item())
|
||||
c_frame = 0
|
||||
for i in range(pred_aln_trg.size(0)):
|
||||
pred_aln_trg[i, c_frame:c_frame + pred_dur[0,i].item()] = 1
|
||||
c_frame += pred_dur[0,i].item()
|
||||
en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
|
||||
F0_pred, N_pred = model.predictor.F0Ntrain(en, s)
|
||||
t_en = model.text_encoder(tokens, input_lengths, text_mask)
|
||||
asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
|
||||
return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
||||
|
||||
def generate(model, text, voicepack, lang='a', speed=1, ps=None):
|
||||
ps = ps or phonemize(text, lang)
|
||||
tokens = tokenize(ps)
|
||||
if not tokens:
|
||||
return None
|
||||
elif len(tokens) > 510:
|
||||
tokens = tokens[:510]
|
||||
print('Truncated to 510 tokens')
|
||||
ref_s = voicepack[len(tokens)]
|
||||
out = forward(model, tokens, ref_s, speed)
|
||||
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
||||
return out, ps
|
||||
|
||||
def generate_full(model, text, voicepack, lang='a', speed=1, ps=None):
|
||||
ps = ps or phonemize(text, lang)
|
||||
tokens = tokenize(ps)
|
||||
if not tokens:
|
||||
return None
|
||||
outs = []
|
||||
loop_count = len(tokens)//510 + (1 if len(tokens) % 510 != 0 else 0)
|
||||
for i in range(loop_count):
|
||||
ref_s = voicepack[len(tokens[i*510:(i+1)*510])]
|
||||
out = forward(model, tokens[i*510:(i+1)*510], ref_s, speed)
|
||||
outs.append(out)
|
||||
outs = np.concatenate(outs)
|
||||
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
||||
return outs, ps
|
||||
373
backend/python/kokoro/models.py
Normal file
373
backend/python/kokoro/models.py
Normal file
@@ -0,0 +1,373 @@
|
||||
# https://github.com/yl4579/StyleTTS2/blob/main/models.py
|
||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/models.py
|
||||
from istftnet import AdaIN1d, Decoder
|
||||
from munch import Munch
|
||||
from pathlib import Path
|
||||
from plbert import load_plbert
|
||||
from torch.nn.utils import weight_norm, spectral_norm
|
||||
import json
|
||||
import numpy as np
|
||||
import os
|
||||
import os.path as osp
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class LinearNorm(torch.nn.Module):
|
||||
def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
|
||||
super(LinearNorm, self).__init__()
|
||||
self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
|
||||
|
||||
torch.nn.init.xavier_uniform_(
|
||||
self.linear_layer.weight,
|
||||
gain=torch.nn.init.calculate_gain(w_init_gain))
|
||||
|
||||
def forward(self, x):
|
||||
return self.linear_layer(x)
|
||||
|
||||
class LayerNorm(nn.Module):
|
||||
def __init__(self, channels, eps=1e-5):
|
||||
super().__init__()
|
||||
self.channels = channels
|
||||
self.eps = eps
|
||||
|
||||
self.gamma = nn.Parameter(torch.ones(channels))
|
||||
self.beta = nn.Parameter(torch.zeros(channels))
|
||||
|
||||
def forward(self, x):
|
||||
x = x.transpose(1, -1)
|
||||
x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
|
||||
return x.transpose(1, -1)
|
||||
|
||||
class TextEncoder(nn.Module):
|
||||
def __init__(self, channels, kernel_size, depth, n_symbols, actv=nn.LeakyReLU(0.2)):
|
||||
super().__init__()
|
||||
self.embedding = nn.Embedding(n_symbols, channels)
|
||||
|
||||
padding = (kernel_size - 1) // 2
|
||||
self.cnn = nn.ModuleList()
|
||||
for _ in range(depth):
|
||||
self.cnn.append(nn.Sequential(
|
||||
weight_norm(nn.Conv1d(channels, channels, kernel_size=kernel_size, padding=padding)),
|
||||
LayerNorm(channels),
|
||||
actv,
|
||||
nn.Dropout(0.2),
|
||||
))
|
||||
# self.cnn = nn.Sequential(*self.cnn)
|
||||
|
||||
self.lstm = nn.LSTM(channels, channels//2, 1, batch_first=True, bidirectional=True)
|
||||
|
||||
def forward(self, x, input_lengths, m):
|
||||
x = self.embedding(x) # [B, T, emb]
|
||||
x = x.transpose(1, 2) # [B, emb, T]
|
||||
m = m.to(input_lengths.device).unsqueeze(1)
|
||||
x.masked_fill_(m, 0.0)
|
||||
|
||||
for c in self.cnn:
|
||||
x = c(x)
|
||||
x.masked_fill_(m, 0.0)
|
||||
|
||||
x = x.transpose(1, 2) # [B, T, chn]
|
||||
|
||||
input_lengths = input_lengths.cpu().numpy()
|
||||
x = nn.utils.rnn.pack_padded_sequence(
|
||||
x, input_lengths, batch_first=True, enforce_sorted=False)
|
||||
|
||||
self.lstm.flatten_parameters()
|
||||
x, _ = self.lstm(x)
|
||||
x, _ = nn.utils.rnn.pad_packed_sequence(
|
||||
x, batch_first=True)
|
||||
|
||||
x = x.transpose(-1, -2)
|
||||
x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
|
||||
|
||||
x_pad[:, :, :x.shape[-1]] = x
|
||||
x = x_pad.to(x.device)
|
||||
|
||||
x.masked_fill_(m, 0.0)
|
||||
|
||||
return x
|
||||
|
||||
def inference(self, x):
|
||||
x = self.embedding(x)
|
||||
x = x.transpose(1, 2)
|
||||
x = self.cnn(x)
|
||||
x = x.transpose(1, 2)
|
||||
self.lstm.flatten_parameters()
|
||||
x, _ = self.lstm(x)
|
||||
return x
|
||||
|
||||
def length_to_mask(self, lengths):
|
||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||
return mask
|
||||
|
||||
|
||||
class UpSample1d(nn.Module):
|
||||
def __init__(self, layer_type):
|
||||
super().__init__()
|
||||
self.layer_type = layer_type
|
||||
|
||||
def forward(self, x):
|
||||
if self.layer_type == 'none':
|
||||
return x
|
||||
else:
|
||||
return F.interpolate(x, scale_factor=2, mode='nearest')
|
||||
|
||||
class AdainResBlk1d(nn.Module):
|
||||
def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
|
||||
upsample='none', dropout_p=0.0):
|
||||
super().__init__()
|
||||
self.actv = actv
|
||||
self.upsample_type = upsample
|
||||
self.upsample = UpSample1d(upsample)
|
||||
self.learned_sc = dim_in != dim_out
|
||||
self._build_weights(dim_in, dim_out, style_dim)
|
||||
self.dropout = nn.Dropout(dropout_p)
|
||||
|
||||
if upsample == 'none':
|
||||
self.pool = nn.Identity()
|
||||
else:
|
||||
self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
|
||||
|
||||
|
||||
def _build_weights(self, dim_in, dim_out, style_dim):
|
||||
self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
|
||||
self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
|
||||
self.norm1 = AdaIN1d(style_dim, dim_in)
|
||||
self.norm2 = AdaIN1d(style_dim, dim_out)
|
||||
if self.learned_sc:
|
||||
self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
|
||||
|
||||
def _shortcut(self, x):
|
||||
x = self.upsample(x)
|
||||
if self.learned_sc:
|
||||
x = self.conv1x1(x)
|
||||
return x
|
||||
|
||||
def _residual(self, x, s):
|
||||
x = self.norm1(x, s)
|
||||
x = self.actv(x)
|
||||
x = self.pool(x)
|
||||
x = self.conv1(self.dropout(x))
|
||||
x = self.norm2(x, s)
|
||||
x = self.actv(x)
|
||||
x = self.conv2(self.dropout(x))
|
||||
return x
|
||||
|
||||
def forward(self, x, s):
|
||||
out = self._residual(x, s)
|
||||
out = (out + self._shortcut(x)) / np.sqrt(2)
|
||||
return out
|
||||
|
||||
class AdaLayerNorm(nn.Module):
|
||||
def __init__(self, style_dim, channels, eps=1e-5):
|
||||
super().__init__()
|
||||
self.channels = channels
|
||||
self.eps = eps
|
||||
|
||||
self.fc = nn.Linear(style_dim, channels*2)
|
||||
|
||||
def forward(self, x, s):
|
||||
x = x.transpose(-1, -2)
|
||||
x = x.transpose(1, -1)
|
||||
|
||||
h = self.fc(s)
|
||||
h = h.view(h.size(0), h.size(1), 1)
|
||||
gamma, beta = torch.chunk(h, chunks=2, dim=1)
|
||||
gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1)
|
||||
|
||||
|
||||
x = F.layer_norm(x, (self.channels,), eps=self.eps)
|
||||
x = (1 + gamma) * x + beta
|
||||
return x.transpose(1, -1).transpose(-1, -2)
|
||||
|
||||
class ProsodyPredictor(nn.Module):
|
||||
|
||||
def __init__(self, style_dim, d_hid, nlayers, max_dur=50, dropout=0.1):
|
||||
super().__init__()
|
||||
|
||||
self.text_encoder = DurationEncoder(sty_dim=style_dim,
|
||||
d_model=d_hid,
|
||||
nlayers=nlayers,
|
||||
dropout=dropout)
|
||||
|
||||
self.lstm = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
|
||||
self.duration_proj = LinearNorm(d_hid, max_dur)
|
||||
|
||||
self.shared = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
|
||||
self.F0 = nn.ModuleList()
|
||||
self.F0.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
|
||||
self.F0.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
|
||||
self.F0.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
|
||||
|
||||
self.N = nn.ModuleList()
|
||||
self.N.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
|
||||
self.N.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
|
||||
self.N.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
|
||||
|
||||
self.F0_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
|
||||
self.N_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
|
||||
|
||||
|
||||
def forward(self, texts, style, text_lengths, alignment, m):
|
||||
d = self.text_encoder(texts, style, text_lengths, m)
|
||||
|
||||
batch_size = d.shape[0]
|
||||
text_size = d.shape[1]
|
||||
|
||||
# predict duration
|
||||
input_lengths = text_lengths.cpu().numpy()
|
||||
x = nn.utils.rnn.pack_padded_sequence(
|
||||
d, input_lengths, batch_first=True, enforce_sorted=False)
|
||||
|
||||
m = m.to(text_lengths.device).unsqueeze(1)
|
||||
|
||||
self.lstm.flatten_parameters()
|
||||
x, _ = self.lstm(x)
|
||||
x, _ = nn.utils.rnn.pad_packed_sequence(
|
||||
x, batch_first=True)
|
||||
|
||||
x_pad = torch.zeros([x.shape[0], m.shape[-1], x.shape[-1]])
|
||||
|
||||
x_pad[:, :x.shape[1], :] = x
|
||||
x = x_pad.to(x.device)
|
||||
|
||||
duration = self.duration_proj(nn.functional.dropout(x, 0.5, training=self.training))
|
||||
|
||||
en = (d.transpose(-1, -2) @ alignment)
|
||||
|
||||
return duration.squeeze(-1), en
|
||||
|
||||
def F0Ntrain(self, x, s):
|
||||
x, _ = self.shared(x.transpose(-1, -2))
|
||||
|
||||
F0 = x.transpose(-1, -2)
|
||||
for block in self.F0:
|
||||
F0 = block(F0, s)
|
||||
F0 = self.F0_proj(F0)
|
||||
|
||||
N = x.transpose(-1, -2)
|
||||
for block in self.N:
|
||||
N = block(N, s)
|
||||
N = self.N_proj(N)
|
||||
|
||||
return F0.squeeze(1), N.squeeze(1)
|
||||
|
||||
def length_to_mask(self, lengths):
|
||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||
return mask
|
||||
|
||||
class DurationEncoder(nn.Module):
|
||||
|
||||
def __init__(self, sty_dim, d_model, nlayers, dropout=0.1):
|
||||
super().__init__()
|
||||
self.lstms = nn.ModuleList()
|
||||
for _ in range(nlayers):
|
||||
self.lstms.append(nn.LSTM(d_model + sty_dim,
|
||||
d_model // 2,
|
||||
num_layers=1,
|
||||
batch_first=True,
|
||||
bidirectional=True,
|
||||
dropout=dropout))
|
||||
self.lstms.append(AdaLayerNorm(sty_dim, d_model))
|
||||
|
||||
|
||||
self.dropout = dropout
|
||||
self.d_model = d_model
|
||||
self.sty_dim = sty_dim
|
||||
|
||||
def forward(self, x, style, text_lengths, m):
|
||||
masks = m.to(text_lengths.device)
|
||||
|
||||
x = x.permute(2, 0, 1)
|
||||
s = style.expand(x.shape[0], x.shape[1], -1)
|
||||
x = torch.cat([x, s], axis=-1)
|
||||
x.masked_fill_(masks.unsqueeze(-1).transpose(0, 1), 0.0)
|
||||
|
||||
x = x.transpose(0, 1)
|
||||
input_lengths = text_lengths.cpu().numpy()
|
||||
x = x.transpose(-1, -2)
|
||||
|
||||
for block in self.lstms:
|
||||
if isinstance(block, AdaLayerNorm):
|
||||
x = block(x.transpose(-1, -2), style).transpose(-1, -2)
|
||||
x = torch.cat([x, s.permute(1, -1, 0)], axis=1)
|
||||
x.masked_fill_(masks.unsqueeze(-1).transpose(-1, -2), 0.0)
|
||||
else:
|
||||
x = x.transpose(-1, -2)
|
||||
x = nn.utils.rnn.pack_padded_sequence(
|
||||
x, input_lengths, batch_first=True, enforce_sorted=False)
|
||||
block.flatten_parameters()
|
||||
x, _ = block(x)
|
||||
x, _ = nn.utils.rnn.pad_packed_sequence(
|
||||
x, batch_first=True)
|
||||
x = F.dropout(x, p=self.dropout, training=self.training)
|
||||
x = x.transpose(-1, -2)
|
||||
|
||||
x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
|
||||
|
||||
x_pad[:, :, :x.shape[-1]] = x
|
||||
x = x_pad.to(x.device)
|
||||
|
||||
return x.transpose(-1, -2)
|
||||
|
||||
def inference(self, x, style):
|
||||
x = self.embedding(x.transpose(-1, -2)) * np.sqrt(self.d_model)
|
||||
style = style.expand(x.shape[0], x.shape[1], -1)
|
||||
x = torch.cat([x, style], axis=-1)
|
||||
src = self.pos_encoder(x)
|
||||
output = self.transformer_encoder(src).transpose(0, 1)
|
||||
return output
|
||||
|
||||
def length_to_mask(self, lengths):
|
||||
mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
|
||||
mask = torch.gt(mask+1, lengths.unsqueeze(1))
|
||||
return mask
|
||||
|
||||
# https://github.com/yl4579/StyleTTS2/blob/main/utils.py
|
||||
def recursive_munch(d):
|
||||
if isinstance(d, dict):
|
||||
return Munch((k, recursive_munch(v)) for k, v in d.items())
|
||||
elif isinstance(d, list):
|
||||
return [recursive_munch(v) for v in d]
|
||||
else:
|
||||
return d
|
||||
|
||||
def build_model(path, device):
|
||||
config = Path(__file__).parent / 'config.json'
|
||||
assert config.exists(), f'Config path incorrect: config.json not found at {config}'
|
||||
with open(config, 'r') as r:
|
||||
args = recursive_munch(json.load(r))
|
||||
assert args.decoder.type == 'istftnet', f'Unknown decoder type: {args.decoder.type}'
|
||||
decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels,
|
||||
resblock_kernel_sizes = args.decoder.resblock_kernel_sizes,
|
||||
upsample_rates = args.decoder.upsample_rates,
|
||||
upsample_initial_channel=args.decoder.upsample_initial_channel,
|
||||
resblock_dilation_sizes=args.decoder.resblock_dilation_sizes,
|
||||
upsample_kernel_sizes=args.decoder.upsample_kernel_sizes,
|
||||
gen_istft_n_fft=args.decoder.gen_istft_n_fft, gen_istft_hop_size=args.decoder.gen_istft_hop_size)
|
||||
text_encoder = TextEncoder(channels=args.hidden_dim, kernel_size=5, depth=args.n_layer, n_symbols=args.n_token)
|
||||
predictor = ProsodyPredictor(style_dim=args.style_dim, d_hid=args.hidden_dim, nlayers=args.n_layer, max_dur=args.max_dur, dropout=args.dropout)
|
||||
bert = load_plbert()
|
||||
bert_encoder = nn.Linear(bert.config.hidden_size, args.hidden_dim)
|
||||
for parent in [bert, bert_encoder, predictor, decoder, text_encoder]:
|
||||
for child in parent.children():
|
||||
if isinstance(child, nn.RNNBase):
|
||||
child.flatten_parameters()
|
||||
model = Munch(
|
||||
bert=bert.to(device).eval(),
|
||||
bert_encoder=bert_encoder.to(device).eval(),
|
||||
predictor=predictor.to(device).eval(),
|
||||
decoder=decoder.to(device).eval(),
|
||||
text_encoder=text_encoder.to(device).eval(),
|
||||
)
|
||||
for key, state_dict in torch.load(path, map_location='cpu', weights_only=True)['net'].items():
|
||||
assert key in model, key
|
||||
try:
|
||||
model[key].load_state_dict(state_dict)
|
||||
except:
|
||||
state_dict = {k[7:]: v for k, v in state_dict.items()}
|
||||
model[key].load_state_dict(state_dict, strict=False)
|
||||
return model
|
||||
16
backend/python/kokoro/plbert.py
Normal file
16
backend/python/kokoro/plbert.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# https://huggingface.co/hexgrad/Kokoro-82M/blob/main/plbert.py
|
||||
# https://github.com/yl4579/StyleTTS2/blob/main/Utils/PLBERT/util.py
|
||||
from transformers import AlbertConfig, AlbertModel
|
||||
|
||||
class CustomAlbert(AlbertModel):
|
||||
def forward(self, *args, **kwargs):
|
||||
# Call the original forward method
|
||||
outputs = super().forward(*args, **kwargs)
|
||||
# Only return the last_hidden_state
|
||||
return outputs.last_hidden_state
|
||||
|
||||
def load_plbert():
|
||||
plbert_config = {'vocab_size': 178, 'hidden_size': 768, 'num_attention_heads': 12, 'intermediate_size': 2048, 'max_position_embeddings': 512, 'num_hidden_layers': 12, 'dropout': 0.1}
|
||||
albert_base_configuration = AlbertConfig(**plbert_config)
|
||||
bert = CustomAlbert(albert_base_configuration)
|
||||
return bert
|
||||
6
backend/python/kokoro/protogen.sh
Normal file
6
backend/python/kokoro/protogen.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
2
backend/python/kokoro/requirements-cpu.txt
Normal file
2
backend/python/kokoro/requirements-cpu.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
torch==2.4.1
|
||||
transformers
|
||||
@@ -1,4 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
accelerate
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
2
backend/python/kokoro/requirements-cublas12.txt
Normal file
2
backend/python/kokoro/requirements-cublas12.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
torch==2.4.1
|
||||
transformers
|
||||
@@ -1,5 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
||||
@@ -1,8 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
transformers
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
accelerate
|
||||
torch==2.3.1+cxx11.abi
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
transformers
|
||||
7
backend/python/kokoro/requirements.txt
Normal file
7
backend/python/kokoro/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
phonemizer
|
||||
scipy
|
||||
munch
|
||||
setuptools
|
||||
soundfile
|
||||
@@ -1,31 +0,0 @@
|
||||
.PHONY: sentencetransformers
|
||||
sentencetransformers: protogen
|
||||
bash ./install.sh
|
||||
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running sentencetransformers..."
|
||||
bash run.sh
|
||||
@echo "sentencetransformers run."
|
||||
|
||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing sentencetransformers..."
|
||||
bash test.sh
|
||||
@echo "sentencetransformers tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the sentencetransformers project
|
||||
|
||||
```
|
||||
make sentencetransformers
|
||||
```
|
||||
@@ -1,6 +0,0 @@
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
||||
@@ -1,5 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
||||
@@ -1,4 +0,0 @@
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
||||
@@ -1,9 +0,0 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
accelerate
|
||||
sentence-transformers==3.3.1
|
||||
transformers
|
||||
@@ -1,5 +0,0 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
datasets
|
||||
einops
|
||||
@@ -1,81 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.kill()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_embedding(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
|
||||
self.assertTrue(response.success)
|
||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
||||
embedding_response = stub.Embedding(embedding_request)
|
||||
self.assertIsNotNone(embedding_response.embeddings)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Embedding service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,29 +0,0 @@
|
||||
.PHONY: transformers-musicgen
|
||||
transformers-musicgen: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running transformers..."
|
||||
bash run.sh
|
||||
@echo "transformers run."
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing transformers..."
|
||||
bash test.sh
|
||||
@echo "transformers tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the transformers project
|
||||
|
||||
```
|
||||
make transformers-musicgen
|
||||
```
|
||||
@@ -1,176 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for MusicgenForConditionalGeneration models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from scipy.io import wavfile
|
||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def SoundGeneration(self, request, context):
|
||||
model_name = request.model
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
inputs = None
|
||||
if request.text == "":
|
||||
inputs = self.model.get_unconditional_inputs(num_samples=1)
|
||||
elif request.HasField('src'):
|
||||
# TODO SECURITY CODE GOES HERE LOL
|
||||
# WHO KNOWS IF THIS WORKS???
|
||||
sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
|
||||
|
||||
if request.HasField('src_divisor'):
|
||||
wsamples = wsamples[: len(wsamples) // request.src_divisor]
|
||||
|
||||
inputs = self.processor(
|
||||
audio=wsamples,
|
||||
sampling_rate=sample_rate,
|
||||
text=[request.text],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
else:
|
||||
inputs = self.processor(
|
||||
text=[request.text],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
tokens = 256
|
||||
if request.HasField('duration'):
|
||||
tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
|
||||
guidance = 3.0
|
||||
if request.HasField('temperature'):
|
||||
guidance = request.temperature
|
||||
dosample = True
|
||||
if request.HasField('sample'):
|
||||
dosample = request.sample
|
||||
audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens)
|
||||
print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr)
|
||||
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
||||
wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
|
||||
print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr)
|
||||
print("[transformers-musicgen] SoundGeneration for", file=sys.stderr)
|
||||
print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
|
||||
# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
inputs = self.processor(
|
||||
text=[request.text],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default
|
||||
audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
|
||||
print("[transformers-musicgen] TTS generated!", file=sys.stderr)
|
||||
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
||||
write_wav(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
|
||||
print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr)
|
||||
print("[transformers-musicgen] TTS for", file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("[transformers-musicgen] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("[transformers-musicgen] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(f"[transformers-musicgen] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
||||
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
@@ -1,3 +0,0 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
@@ -1,4 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1+cu118
|
||||
@@ -1,3 +0,0 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
@@ -1,4 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1+rocm6.0
|
||||
@@ -1,4 +0,0 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
scipy==1.14.0
|
||||
certifi
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
@@ -1,100 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if TTS is generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_sound_generation(self):
|
||||
"""
|
||||
This method tests if SoundGeneration is generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
|
||||
self.assertTrue(response.success)
|
||||
sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||
sg_response = stub.SoundGeneration(sg_request)
|
||||
self.assertIsNotNone(sg_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("SoundGeneration service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -22,6 +22,10 @@ import torch.cuda
|
||||
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||
from scipy.io import wavfile
|
||||
import outetts
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
@@ -85,10 +89,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
self.CUDA = torch.cuda.is_available()
|
||||
self.OV=False
|
||||
self.OuteTTS=False
|
||||
self.SentenceTransformer = False
|
||||
|
||||
device_map="cpu"
|
||||
|
||||
quantization = None
|
||||
autoTokenizer = True
|
||||
|
||||
if self.CUDA:
|
||||
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
|
||||
@@ -191,6 +198,53 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
export=True,
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
elif request.Type == "MusicgenForConditionalGeneration":
|
||||
autoTokenizer = False
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
elif request.Type == "OuteTTS":
|
||||
autoTokenizer = False
|
||||
options = request.Options
|
||||
MODELNAME = "OuteAI/OuteTTS-0.3-1B"
|
||||
TOKENIZER = "OuteAI/OuteTTS-0.3-1B"
|
||||
VERSION = "0.3"
|
||||
SPEAKER = "en_male_1"
|
||||
for opt in options:
|
||||
if opt.startswith("tokenizer:"):
|
||||
TOKENIZER = opt.split(":")[1]
|
||||
break
|
||||
if opt.startswith("version:"):
|
||||
VERSION = opt.split(":")[1]
|
||||
break
|
||||
if opt.startswith("speaker:"):
|
||||
SPEAKER = opt.split(":")[1]
|
||||
break
|
||||
|
||||
if model_name != "":
|
||||
MODELNAME = model_name
|
||||
|
||||
# Configure the model
|
||||
model_config = outetts.HFModelConfig_v2(
|
||||
model_path=MODELNAME,
|
||||
tokenizer_path=TOKENIZER
|
||||
)
|
||||
# Initialize the interface
|
||||
self.interface = outetts.InterfaceHF(model_version=VERSION, cfg=model_config)
|
||||
self.OuteTTS = True
|
||||
|
||||
self.interface.print_default_speakers()
|
||||
if request.AudioPath:
|
||||
if os.path.isabs(request.AudioPath):
|
||||
self.AudioPath = request.AudioPath
|
||||
else:
|
||||
self.AudioPath = os.path.join(request.ModelPath, request.AudioPath)
|
||||
self.speaker = self.interface.create_speaker(audio_path=self.AudioPath)
|
||||
else:
|
||||
self.speaker = self.interface.load_default_speaker(name=SPEAKER)
|
||||
elif request.Type == "SentenceTransformer":
|
||||
autoTokenizer = False
|
||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
self.SentenceTransformer = True
|
||||
else:
|
||||
print("Automodel", file=sys.stderr)
|
||||
self.model = AutoModel.from_pretrained(model_name,
|
||||
@@ -201,19 +255,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
torch_dtype=compute)
|
||||
if request.ContextSize > 0:
|
||||
self.max_tokens = request.ContextSize
|
||||
else:
|
||||
elif hasattr(self.model, 'config') and hasattr(self.model.config, 'max_position_embeddings'):
|
||||
self.max_tokens = self.model.config.max_position_embeddings
|
||||
else:
|
||||
self.max_tokens = 512
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||
self.XPU = False
|
||||
if autoTokenizer:
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||
self.XPU = False
|
||||
|
||||
if XPU and self.OV == False:
|
||||
self.XPU = True
|
||||
try:
|
||||
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
|
||||
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
|
||||
except Exception as err:
|
||||
print("Not using XPU:", err, file=sys.stderr)
|
||||
if XPU and self.OV == False:
|
||||
self.XPU = True
|
||||
try:
|
||||
print("Optimizing model", model_name, "to XPU.", file=sys.stderr)
|
||||
self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu")
|
||||
except Exception as err:
|
||||
print("Not using XPU:", err, file=sys.stderr)
|
||||
|
||||
except Exception as err:
|
||||
print("Error:", err, file=sys.stderr)
|
||||
@@ -239,18 +296,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
max_length = 512
|
||||
if request.Tokens != 0:
|
||||
max_length = request.Tokens
|
||||
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
||||
|
||||
# Create word embeddings
|
||||
if self.CUDA:
|
||||
encoded_input = encoded_input.to("cuda")
|
||||
embeds = None
|
||||
|
||||
with torch.no_grad():
|
||||
model_output = self.model(**encoded_input)
|
||||
if self.SentenceTransformer:
|
||||
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||
embeds = self.model.encode(request.Embeddings)
|
||||
else:
|
||||
encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
|
||||
|
||||
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
||||
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
|
||||
# Create word embeddings
|
||||
if self.CUDA:
|
||||
encoded_input = encoded_input.to("cuda")
|
||||
|
||||
with torch.no_grad():
|
||||
model_output = self.model(**encoded_input)
|
||||
|
||||
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
||||
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
||||
embeds = sentence_embeddings[0]
|
||||
return backend_pb2.EmbeddingResult(embeddings=embeds)
|
||||
|
||||
async def _predict(self, request, context, streaming=False):
|
||||
set_seed(request.Seed)
|
||||
@@ -380,6 +445,114 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
finally:
|
||||
await iterations.aclose()
|
||||
|
||||
def SoundGeneration(self, request, context):
|
||||
model_name = request.model
|
||||
try:
|
||||
if self.processor is None:
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
if self.model is None:
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
inputs = None
|
||||
if request.text == "":
|
||||
inputs = self.model.get_unconditional_inputs(num_samples=1)
|
||||
elif request.HasField('src'):
|
||||
# TODO SECURITY CODE GOES HERE LOL
|
||||
# WHO KNOWS IF THIS WORKS???
|
||||
sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
|
||||
|
||||
if request.HasField('src_divisor'):
|
||||
wsamples = wsamples[: len(wsamples) // request.src_divisor]
|
||||
|
||||
inputs = self.processor(
|
||||
audio=wsamples,
|
||||
sampling_rate=sample_rate,
|
||||
text=[request.text],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
else:
|
||||
inputs = self.processor(
|
||||
text=[request.text],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
|
||||
tokens = 256
|
||||
if request.HasField('duration'):
|
||||
tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
|
||||
guidance = 3.0
|
||||
if request.HasField('temperature'):
|
||||
guidance = request.temperature
|
||||
dosample = True
|
||||
if request.HasField('sample'):
|
||||
dosample = request.sample
|
||||
audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens)
|
||||
print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr)
|
||||
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
||||
wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
|
||||
print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr)
|
||||
print("[transformers-musicgen] SoundGeneration for", file=sys.stderr)
|
||||
print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def OuteTTS(self, request, context):
|
||||
try:
|
||||
print("[OuteTTS] generating TTS", file=sys.stderr)
|
||||
gen_cfg = outetts.GenerationConfig(
|
||||
text="Speech synthesis is the artificial production of human speech.",
|
||||
temperature=0.1,
|
||||
repetition_penalty=1.1,
|
||||
max_length=self.max_tokens,
|
||||
speaker=self.speaker,
|
||||
# voice_characteristics="upbeat enthusiasm, friendliness, clarity, professionalism, and trustworthiness"
|
||||
)
|
||||
output = self.interface.generate(config=gen_cfg)
|
||||
print("[OuteTTS] Generated TTS", file=sys.stderr)
|
||||
output.save(request.dst)
|
||||
print("[OuteTTS] TTS done", file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
|
||||
def TTS(self, request, context):
|
||||
if self.OuteTTS:
|
||||
return self.OuteTTS(request, context)
|
||||
|
||||
model_name = request.model
|
||||
try:
|
||||
if self.processor is None:
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
self.processor = AutoProcessor.from_pretrained(model_name)
|
||||
if self.model is None:
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
|
||||
inputs = self.processor(
|
||||
text=[request.text],
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
)
|
||||
tokens = self.max_tokens # No good place to set the "length" in TTS, so use 10s as a sane default
|
||||
audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
|
||||
print("[transformers-musicgen] TTS generated!", file=sys.stderr)
|
||||
sampling_rate = self.model.config.audio_encoder.sampling_rate
|
||||
wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
|
||||
print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr)
|
||||
print("[transformers-musicgen] TTS for", file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
async def serve(address):
|
||||
# Start asyncio gRPC server
|
||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
torch==2.4.1
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
@@ -1,5 +1,9 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -2,4 +2,9 @@
|
||||
torch==2.4.1+rocm6.0
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
bitsandbytes
|
||||
outetts
|
||||
bitsandbytes
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -3,5 +3,9 @@ intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
setuptools
|
||||
scipy==1.15.1
|
||||
numpy>=2.0.0
|
||||
@@ -19,6 +19,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
@@ -31,7 +32,6 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
time.sleep(10)
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
@@ -48,7 +48,6 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
time.sleep(10)
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
@@ -66,7 +65,6 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
time.sleep(10)
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
@@ -80,5 +78,96 @@ class TestBackendServicer(unittest.TestCase):
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Embedding service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_audio_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if TTS is generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_sound_generation(self):
|
||||
"""
|
||||
This method tests if SoundGeneration is generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration"))
|
||||
self.assertTrue(response.success)
|
||||
sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||
sg_response = stub.SoundGeneration(sg_request)
|
||||
self.assertIsNotNone(sg_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("SoundGeneration service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_embed_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_sentencetransformers_embedding(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer"))
|
||||
self.assertTrue(response.success)
|
||||
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
|
||||
embedding_response = stub.Embedding(embedding_request)
|
||||
self.assertIsNotNone(embedding_response.embeddings)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Embedding service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
1
backend/python/vall-e-x/.gitignore
vendored
1
backend/python/vall-e-x/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
source
|
||||
@@ -1,33 +0,0 @@
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: ttsvalle
|
||||
ttsvalle: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running ttsvalle..."
|
||||
bash run.sh
|
||||
@echo "ttsvalle run."
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing valle..."
|
||||
bash test.sh
|
||||
@echo "valle tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf source venv __pycache__
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the ttsvalle project
|
||||
|
||||
```
|
||||
make ttsvalle
|
||||
```
|
||||
@@ -1,141 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from concurrent import futures
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from utils.generation import SAMPLE_RATE, generate_audio, preload_models
|
||||
from scipy.io.wavfile import write as write_wav
|
||||
from utils.prompt_making import make_prompt
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
gRPC servicer for backend services.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
Health check service.
|
||||
|
||||
Args:
|
||||
request: A backend_pb2.HealthRequest instance.
|
||||
context: A grpc.ServicerContext instance.
|
||||
|
||||
Returns:
|
||||
A backend_pb2.Reply instance with message "OK".
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
Load model service.
|
||||
|
||||
Args:
|
||||
request: A backend_pb2.LoadModelRequest instance.
|
||||
context: A grpc.ServicerContext instance.
|
||||
|
||||
Returns:
|
||||
A backend_pb2.Result instance with message "Model loaded successfully" and success=True if successful.
|
||||
A backend_pb2.Result instance with success=False and error message if unsuccessful.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
print("Preparing models, please wait", file=sys.stderr)
|
||||
# download and load all models
|
||||
preload_models()
|
||||
self.clonedVoice = False
|
||||
# Assume directory from request.ModelFile.
|
||||
# Only if request.LoraAdapter it's not an absolute path
|
||||
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
|
||||
# get base path of modelFile
|
||||
modelFileBase = os.path.dirname(request.ModelFile)
|
||||
# modify LoraAdapter to be relative to modelFileBase
|
||||
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
||||
if request.AudioPath != "":
|
||||
print("Generating model", file=sys.stderr)
|
||||
make_prompt(name=model_name, audio_prompt_path=request.AudioPath)
|
||||
self.clonedVoice = True
|
||||
### Use given transcript
|
||||
##make_prompt(name=model_name, audio_prompt_path="paimon_prompt.wav",
|
||||
## transcript="Just, what was that? Paimon thought we were gonna get eaten.")
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
# Implement your logic here for the LoadModel service
|
||||
# Replace this with your desired response
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def TTS(self, request, context):
|
||||
"""
|
||||
Text-to-speech service.
|
||||
|
||||
Args:
|
||||
request: A backend_pb2.TTSRequest instance.
|
||||
context: A grpc.ServicerContext instance.
|
||||
|
||||
Returns:
|
||||
A backend_pb2.Result instance with success=True if successful.
|
||||
A backend_pb2.Result instance with success=False and error message if unsuccessful.
|
||||
"""
|
||||
model = request.model
|
||||
print(request, file=sys.stderr)
|
||||
try:
|
||||
audio_array = None
|
||||
if model != "":
|
||||
if self.clonedVoice:
|
||||
model = os.path.basename(request.model)
|
||||
audio_array = generate_audio(request.text, prompt=model)
|
||||
else:
|
||||
audio_array = generate_audio(request.text)
|
||||
print("saving to", request.dst, file=sys.stderr)
|
||||
# save audio to disk
|
||||
write_wav(request.dst, SAMPLE_RATE, audio_array)
|
||||
print("saved to", request.dst, file=sys.stderr)
|
||||
print("tts for", file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
@@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
VALL_E_X_VERSION=3faaf8ccadb154d63b38070caf518ce9309ea0f4
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
git clone https://github.com/Plachtaa/VALL-E-X.git ${MY_DIR}/source
|
||||
pushd ${MY_DIR}/source && git checkout -b build ${VALL_E_X_VERSION} && popd
|
||||
uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
|
||||
|
||||
cp -v ./*py $MY_DIR/source/
|
||||
@@ -1,3 +0,0 @@
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
@@ -1,3 +0,0 @@
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
@@ -1,4 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
accelerate
|
||||
torch==2.3.0+rocm6.0
|
||||
torchaudio==2.3.0+rocm6.0
|
||||
@@ -1,7 +0,0 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
accelerate
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
optimum[openvino]
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
@@ -1,4 +0,0 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
setuptools
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
BACKEND_FILE="${MY_DIR}/source/backend.py"
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
@@ -1,81 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
TEST_FILE="./source/test.py"
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -27,8 +27,10 @@ type LLMResponse struct {
|
||||
}
|
||||
|
||||
type TokenUsage struct {
|
||||
Prompt int
|
||||
Completion int
|
||||
Prompt int
|
||||
Completion int
|
||||
TimingPromptProcessing float64
|
||||
TimingTokenGeneration float64
|
||||
}
|
||||
|
||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||
@@ -123,6 +125,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
|
||||
tokenUsage.Prompt = int(reply.PromptTokens)
|
||||
tokenUsage.Completion = int(reply.Tokens)
|
||||
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
|
||||
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
|
||||
|
||||
for len(partialRune) > 0 {
|
||||
r, size := utf8.DecodeRune(partialRune)
|
||||
@@ -157,6 +161,10 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
if tokenUsage.Completion == 0 {
|
||||
tokenUsage.Completion = int(reply.Tokens)
|
||||
}
|
||||
|
||||
tokenUsage.TimingTokenGeneration = reply.TimingTokenGeneration
|
||||
tokenUsage.TimingPromptProcessing = reply.TimingPromptProcessing
|
||||
|
||||
return LLMResponse{
|
||||
Response: string(reply.Message),
|
||||
Usage: tokenUsage,
|
||||
|
||||
@@ -140,7 +140,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
NBatch: int32(b),
|
||||
NoMulMatQ: c.NoMulMatQ,
|
||||
DraftModel: c.DraftModel,
|
||||
AudioPath: c.VallE.AudioPath,
|
||||
AudioPath: c.AudioPath,
|
||||
Quantization: c.Quantization,
|
||||
LoadFormat: c.LoadFormat,
|
||||
GPUMemoryUtilization: c.GPUMemoryUtilization,
|
||||
|
||||
@@ -70,6 +70,7 @@ type RunCMD struct {
|
||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
||||
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
|
||||
MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"`
|
||||
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
|
||||
}
|
||||
|
||||
@@ -107,6 +108,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
|
||||
config.WithP2PNetworkID(r.Peer2PeerNetworkID),
|
||||
config.WithLoadToMemory(r.LoadToMemory),
|
||||
config.WithMachineTag(r.MachineTag),
|
||||
}
|
||||
|
||||
if r.DisableMetricsEndpoint {
|
||||
|
||||
@@ -65,6 +65,8 @@ type ApplicationConfig struct {
|
||||
ModelsURL []string
|
||||
|
||||
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
||||
|
||||
MachineTag string
|
||||
}
|
||||
|
||||
type AppOption func(*ApplicationConfig)
|
||||
@@ -94,6 +96,12 @@ func WithModelPath(path string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithMachineTag(tag string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.MachineTag = tag
|
||||
}
|
||||
}
|
||||
|
||||
func WithCors(b bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.CORS = b
|
||||
|
||||
@@ -21,8 +21,7 @@ type TTSConfig struct {
|
||||
// Voice wav path or id
|
||||
Voice string `yaml:"voice"`
|
||||
|
||||
// Vall-e-x
|
||||
VallE VallE `yaml:"vall-e"`
|
||||
AudioPath string `yaml:"audio_path"`
|
||||
}
|
||||
|
||||
type BackendConfig struct {
|
||||
@@ -82,10 +81,6 @@ type File struct {
|
||||
URI downloader.URI `yaml:"uri" json:"uri"`
|
||||
}
|
||||
|
||||
type VallE struct {
|
||||
AudioPath string `yaml:"audio_path"`
|
||||
}
|
||||
|
||||
type FeatureFlag map[string]*bool
|
||||
|
||||
func (ff FeatureFlag) Enabled(s string) bool {
|
||||
@@ -520,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
if (u & FLAG_IMAGE) == FLAG_IMAGE {
|
||||
imageBackends := []string{"diffusers", "tinydream", "stablediffusion"}
|
||||
imageBackends := []string{"diffusers", "stablediffusion"}
|
||||
if !slices.Contains(imageBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -89,6 +89,14 @@ func API(application *application.Application) (*fiber.App, error) {
|
||||
|
||||
router.Use(middleware.StripPathPrefix())
|
||||
|
||||
if application.ApplicationConfig().MachineTag != "" {
|
||||
router.Use(func(c *fiber.Ctx) error {
|
||||
c.Response().Header.Set("Machine-Tag", application.ApplicationConfig().MachineTag)
|
||||
|
||||
return c.Next()
|
||||
})
|
||||
}
|
||||
|
||||
router.Hooks().OnListen(func(listenData fiber.ListenData) error {
|
||||
scheme := "http"
|
||||
if listenData.TLS {
|
||||
|
||||
@@ -822,7 +822,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
application, err := application.New(
|
||||
append(commonOpts,
|
||||
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")),
|
||||
config.WithContext(c),
|
||||
config.WithModelPath(modelPath),
|
||||
)...)
|
||||
@@ -1000,7 +1000,7 @@ var _ = Describe("API test", func() {
|
||||
}
|
||||
}
|
||||
|
||||
deleteBody := schema.StoresDelete{
|
||||
deleteBody := schema.StoresReset{
|
||||
Keys: [][]float32{
|
||||
{0.1, 0.2, 0.3},
|
||||
},
|
||||
|
||||
@@ -36,9 +36,9 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
|
||||
}
|
||||
}
|
||||
|
||||
func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
func StoresResetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.StoresDelete)
|
||||
input := new(schema.StoresReset)
|
||||
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
@@ -49,7 +49,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
|
||||
return err
|
||||
}
|
||||
|
||||
if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
|
||||
if _, err := sb.StoresReset(c.Context(), nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -57,37 +57,6 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
|
||||
}
|
||||
}
|
||||
|
||||
func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.StoresGet)
|
||||
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sb, err := backend.StoreBackend(sl, appConfig, input.Store)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
res := schema.StoresGetResponse{
|
||||
Keys: keys,
|
||||
Values: make([]string, len(vals)),
|
||||
}
|
||||
|
||||
for i, v := range vals {
|
||||
res.Values[i] = string(v)
|
||||
}
|
||||
|
||||
return c.JSON(res)
|
||||
}
|
||||
}
|
||||
|
||||
func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
input := new(schema.StoresFind)
|
||||
|
||||
@@ -24,7 +24,6 @@ import (
|
||||
// @Router /tts [post]
|
||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.TTSRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
|
||||
@@ -19,7 +19,6 @@ import (
|
||||
// @Router /vad [post]
|
||||
func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(schema.VADRequest)
|
||||
|
||||
// Get input data from the request body
|
||||
|
||||
@@ -30,7 +30,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
var id, textContentToReturn string
|
||||
var created int
|
||||
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
initialMessage := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
@@ -40,18 +40,24 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
}
|
||||
responses <- initialMessage
|
||||
|
||||
ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||
}
|
||||
if extraUsage {
|
||||
usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
|
||||
usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
|
||||
}
|
||||
|
||||
resp := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
||||
Object: "chat.completion.chunk",
|
||||
Usage: schema.OpenAIUsage{
|
||||
PromptTokens: usage.Prompt,
|
||||
CompletionTokens: usage.Completion,
|
||||
TotalTokens: usage.Prompt + usage.Completion,
|
||||
},
|
||||
Usage: usage,
|
||||
}
|
||||
|
||||
responses <- resp
|
||||
@@ -59,7 +65,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
})
|
||||
close(responses)
|
||||
}
|
||||
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
result := ""
|
||||
_, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
result += s
|
||||
@@ -90,6 +96,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
log.Error().Err(err).Msg("error handling question")
|
||||
return
|
||||
}
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||
}
|
||||
if extraUsage {
|
||||
usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
|
||||
usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
|
||||
}
|
||||
|
||||
resp := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
@@ -97,11 +112,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
|
||||
Object: "chat.completion.chunk",
|
||||
Usage: schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||
},
|
||||
Usage: usage,
|
||||
}
|
||||
|
||||
responses <- resp
|
||||
@@ -170,6 +181,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
}
|
||||
c.Set("X-Correlation-ID", correlationID)
|
||||
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
@@ -319,9 +333,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
|
||||
if !shouldUseFn {
|
||||
go process(predInput, input, config, ml, responses)
|
||||
go process(predInput, input, config, ml, responses, extraUsage)
|
||||
} else {
|
||||
go processTools(noActionName, predInput, input, config, ml, responses)
|
||||
go processTools(noActionName, predInput, input, config, ml, responses, extraUsage)
|
||||
}
|
||||
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
@@ -449,6 +463,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||
}
|
||||
if extraUsage {
|
||||
usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
|
||||
usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
@@ -456,11 +479,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "chat.completion",
|
||||
Usage: schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||
},
|
||||
Usage: usage,
|
||||
}
|
||||
respData, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", respData)
|
||||
|
||||
@@ -30,8 +30,17 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||
ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) {
|
||||
ComputeChoices(req, s, config, appConfig, loader, func(s string, c *[]schema.Choice) {}, func(s string, tokenUsage backend.TokenUsage) bool {
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: tokenUsage.Prompt,
|
||||
CompletionTokens: tokenUsage.Completion,
|
||||
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||
}
|
||||
if extraUsage {
|
||||
usage.TimingTokenGeneration = tokenUsage.TimingTokenGeneration
|
||||
usage.TimingPromptProcessing = tokenUsage.TimingPromptProcessing
|
||||
}
|
||||
resp := schema.OpenAIResponse{
|
||||
ID: id,
|
||||
Created: created,
|
||||
@@ -43,11 +52,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
},
|
||||
},
|
||||
Object: "text_completion",
|
||||
Usage: schema.OpenAIUsage{
|
||||
PromptTokens: usage.Prompt,
|
||||
CompletionTokens: usage.Completion,
|
||||
TotalTokens: usage.Prompt + usage.Completion,
|
||||
},
|
||||
Usage: usage,
|
||||
}
|
||||
log.Debug().Msgf("Sending goroutine: %s", s)
|
||||
|
||||
@@ -60,6 +65,10 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
return func(c *fiber.Ctx) error {
|
||||
// Add Correlation
|
||||
c.Set("X-Correlation-ID", id)
|
||||
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
@@ -113,7 +122,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
|
||||
responses := make(chan schema.OpenAIResponse)
|
||||
|
||||
go process(predInput, input, config, ml, responses)
|
||||
go process(predInput, input, config, ml, responses, extraUsage)
|
||||
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
|
||||
@@ -170,11 +179,20 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
return err
|
||||
}
|
||||
|
||||
totalTokenUsage.Prompt += tokenUsage.Prompt
|
||||
totalTokenUsage.Completion += tokenUsage.Completion
|
||||
totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration
|
||||
totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing
|
||||
|
||||
result = append(result, r...)
|
||||
}
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: totalTokenUsage.Prompt,
|
||||
CompletionTokens: totalTokenUsage.Completion,
|
||||
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||
}
|
||||
if extraUsage {
|
||||
usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration
|
||||
usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing
|
||||
}
|
||||
|
||||
resp := &schema.OpenAIResponse{
|
||||
ID: id,
|
||||
@@ -182,11 +200,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "text_completion",
|
||||
Usage: schema.OpenAIUsage{
|
||||
PromptTokens: totalTokenUsage.Prompt,
|
||||
CompletionTokens: totalTokenUsage.Completion,
|
||||
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||
},
|
||||
Usage: usage,
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
|
||||
@@ -25,6 +25,9 @@ import (
|
||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
|
||||
return func(c *fiber.Ctx) error {
|
||||
// Opt-in extra usage flag
|
||||
extraUsage := c.Get("Extra-Usage", "") != ""
|
||||
|
||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
@@ -61,8 +64,20 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
totalTokenUsage.Prompt += tokenUsage.Prompt
|
||||
totalTokenUsage.Completion += tokenUsage.Completion
|
||||
|
||||
totalTokenUsage.TimingTokenGeneration += tokenUsage.TimingTokenGeneration
|
||||
totalTokenUsage.TimingPromptProcessing += tokenUsage.TimingPromptProcessing
|
||||
|
||||
result = append(result, r...)
|
||||
}
|
||||
usage := schema.OpenAIUsage{
|
||||
PromptTokens: totalTokenUsage.Prompt,
|
||||
CompletionTokens: totalTokenUsage.Completion,
|
||||
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||
}
|
||||
if extraUsage {
|
||||
usage.TimingTokenGeneration = totalTokenUsage.TimingTokenGeneration
|
||||
usage.TimingPromptProcessing = totalTokenUsage.TimingPromptProcessing
|
||||
}
|
||||
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
@@ -72,11 +87,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "edit",
|
||||
Usage: schema.OpenAIUsage{
|
||||
PromptTokens: totalTokenUsage.Prompt,
|
||||
CompletionTokens: totalTokenUsage.Completion,
|
||||
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||
},
|
||||
Usage: usage,
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
|
||||
@@ -130,8 +130,6 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
switch config.Backend {
|
||||
case "stablediffusion":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
case "tinydream":
|
||||
config.Backend = model.TinyDreamBackend
|
||||
case "":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
}
|
||||
|
||||
@@ -52,6 +52,8 @@ func ComputeChoices(
|
||||
|
||||
tokenUsage.Prompt += prediction.Usage.Prompt
|
||||
tokenUsage.Completion += prediction.Usage.Completion
|
||||
tokenUsage.TimingPromptProcessing += prediction.Usage.TimingPromptProcessing
|
||||
tokenUsage.TimingTokenGeneration += prediction.Usage.TimingTokenGeneration
|
||||
|
||||
finetunedResponse := backend.Finetune(*config, predInput, prediction.Response)
|
||||
cb(finetunedResponse, &result)
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
// @Summary List and describe the various models available in the API.
|
||||
// @Success 200 {object} schema.ModelsDataResponse "Response"
|
||||
// @Router /v1/models [get]
|
||||
func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
||||
func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
// If blank, no filter is applied.
|
||||
filter := c.Query("filter")
|
||||
|
||||
@@ -39,8 +39,7 @@ func RegisterLocalAIRoutes(router *fiber.App,
|
||||
// Stores
|
||||
sl := model.NewModelLoader("")
|
||||
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
||||
router.Post("/stores/reset", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
||||
|
||||
if !appConfig.DisableMetrics {
|
||||
|
||||
@@ -130,6 +130,6 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
||||
}
|
||||
|
||||
// List models
|
||||
app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
|
||||
app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
|
||||
app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
|
||||
}
|
||||
|
||||
@@ -47,21 +47,8 @@ type StoresSet struct {
|
||||
Values []string `json:"values" yaml:"values"`
|
||||
}
|
||||
|
||||
type StoresDelete struct {
|
||||
type StoresReset struct {
|
||||
Store string `json:"store,omitempty" yaml:"store,omitempty"`
|
||||
|
||||
Keys [][]float32 `json:"keys"`
|
||||
}
|
||||
|
||||
type StoresGet struct {
|
||||
Store string `json:"store,omitempty" yaml:"store,omitempty"`
|
||||
|
||||
Keys [][]float32 `json:"keys" yaml:"keys"`
|
||||
}
|
||||
|
||||
type StoresGetResponse struct {
|
||||
Keys [][]float32 `json:"keys" yaml:"keys"`
|
||||
Values []string `json:"values" yaml:"values"`
|
||||
}
|
||||
|
||||
type StoresFind struct {
|
||||
|
||||
@@ -23,6 +23,9 @@ type OpenAIUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
// Extra timing data, disabled by default as is't not a part of OpenAI specification
|
||||
TimingPromptProcessing float64 `json:"timing_prompt_processing,omitempty"`
|
||||
TimingTokenGeneration float64 `json:"timing_token_generation,omitempty"`
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
|
||||
@@ -520,6 +520,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed
|
||||
| --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT |
|
||||
| --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY |
|
||||
| --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME |
|
||||
| --machine-tag | | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG |
|
||||
|
||||
#### Backend Flags
|
||||
| Parameter | Default | Description | Environment Variable |
|
||||
@@ -553,6 +554,34 @@ LOCALAI_MODELS_PATH=/mnt/storage/localai/models
|
||||
LOCALAI_F16=true
|
||||
```
|
||||
|
||||
### Request headers
|
||||
|
||||
You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to receive inference timings in milliseconds extending default OpenAI response model in the usage field:
|
||||
```
|
||||
...
|
||||
{
|
||||
"id": "...",
|
||||
"created": ...,
|
||||
"model": "...",
|
||||
"choices": [
|
||||
{
|
||||
...
|
||||
},
|
||||
...
|
||||
],
|
||||
"object": "...",
|
||||
"usage": {
|
||||
"prompt_tokens": ...,
|
||||
"completion_tokens": ...,
|
||||
"total_tokens": ...,
|
||||
// Extra-Usage header key will include these two float fields:
|
||||
"timing_prompt_processing: ...,
|
||||
"timing_token_generation": ...,
|
||||
},
|
||||
}
|
||||
...
|
||||
```
|
||||
|
||||
### Extra backends
|
||||
|
||||
LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends.
|
||||
@@ -616,4 +645,4 @@ Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the
|
||||
|
||||
LocalAI will automatically discover the CPU flagset available in your host and will use the most optimized version of the backends.
|
||||
|
||||
If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables.
|
||||
If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables.
|
||||
|
||||
@@ -88,7 +88,7 @@ Here is the list of the variables available that can be used to customize the bu
|
||||
| Variable | Default | Description |
|
||||
| ---------------------| ------- | ----------- |
|
||||
| `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` |
|
||||
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts`, `tinydream` |
|
||||
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts` |
|
||||
| `CLBLAST_DIR` | | Specify a CLBlast directory |
|
||||
| `CUDA_LIBPATH` | | Specify a CUDA library path |
|
||||
| `BUILD_API_ONLY` | false | Set to true to build only the API (no backends will be built) |
|
||||
@@ -202,7 +202,7 @@ make build
|
||||
|
||||
**Requirements**: OpenCV, Gomp
|
||||
|
||||
Image generation requires `GO_TAGS=stablediffusion` or `GO_TAGS=tinydream` to be set during build:
|
||||
Image generation requires `GO_TAGS=stablediffusion` to be set during build:
|
||||
|
||||
```
|
||||
make GO_TAGS=stablediffusion build
|
||||
|
||||
@@ -16,7 +16,7 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
|
||||
|
||||
**Available Images Types**:
|
||||
|
||||
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
|
||||
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
|
||||
- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
|
||||
- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
|
||||
- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
|
||||
@@ -197,7 +197,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-core` | `localai/localai:{{< version >}}-sycl-f16-core` |
|
||||
| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg` |
|
||||
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f16-ffmpeg-core` |
|
||||
|
||||
@@ -209,7 +209,7 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
||||
| --- | --- |-------------------------------------------------------------|
|
||||
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32` |
|
||||
| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32` |
|
||||
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-core` | `localai/localai:{{< version >}}-sycl-f32-core` |
|
||||
| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg` |
|
||||
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32-ffmpeg-core` | `localai/localai:{{< version >}}-sycl-f32-ffmpeg-core` |
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
|
||||
| `mamba` | Mamba models architecture | yes | GPT | no | no | CPU/CUDA |
|
||||
| `exllama2` | GPTQ | yes | GPT only | no | no | N/A |
|
||||
| `transformers-musicgen` | | no | Audio generation | no | no | N/A |
|
||||
| [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A |
|
||||
| stablediffusion | no | Image | no | no | N/A |
|
||||
| `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
| `openvoice` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
| `parler-tts` | Open voice | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
- &phi4
|
||||
url: "github:mudler/LocalAI/gallery/phi-4-chat.yaml@master"
|
||||
name: "phi-4"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722
|
||||
license: mit
|
||||
tags:
|
||||
- llm
|
||||
@@ -189,7 +190,7 @@
|
||||
- https://huggingface.co/Nitral-AI/NightWing3-10B-v0.1
|
||||
- https://huggingface.co/bartowski/NightWing3-10B-v0.1-GGUF
|
||||
description: |
|
||||
Base model: (Falcon3-10B)
|
||||
Base model: (Falcon3-10B)
|
||||
overrides:
|
||||
parameters:
|
||||
model: NightWing3-10B-v0.1-Q4_K_M.gguf
|
||||
@@ -224,7 +225,7 @@
|
||||
uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf
|
||||
- &llama33
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
license: llama3.3
|
||||
description: |
|
||||
The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||
@@ -401,9 +402,64 @@
|
||||
- filename: Negative-Anubis-70B-v1-Q4_K_M.gguf
|
||||
sha256: ac088da9ca70fffaa70c876fbada9fc5a02e7d6049ef68f16b11a9c3256f2510
|
||||
uri: huggingface://bartowski/Negative-Anubis-70B-v1-GGUF/Negative-Anubis-70B-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "l3.3-ms-nevoria-70b"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/dtlCF4LbekmDD2y3LNpdH.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/Steelskull/L3.3-MS-Nevoria-70b
|
||||
- https://huggingface.co/bartowski/L3.3-MS-Nevoria-70b-GGUF
|
||||
description: |
|
||||
This model was created as I liked the storytelling of EVA, the prose and details of scenes from EURYALE and Anubis, enhanced with Negative_LLAMA to kill off the positive bias with a touch of nemotron sprinkeled in.
|
||||
|
||||
The choice to use the lorablated model as a base was intentional - while it might seem counterintuitive, this approach creates unique interactions between the weights, similar to what was achieved in the original Astoria model and Astoria V2 model . Rather than simply removing refusals, this "weight twisting" effect that occurs when subtracting the lorablated base model from the other models during the merge process creates an interesting balance in the final model's behavior. While this approach differs from traditional sequential application of components, it was chosen for its unique characteristics in the model's responses.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3.3-MS-Nevoria-70b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3.3-MS-Nevoria-70b-Q4_K_M.gguf
|
||||
sha256: e8b0763f263089a19d4b112b7ed5085cc5f1ed9ca49c5085baa8d51f4ded1f94
|
||||
uri: huggingface://bartowski/L3.3-MS-Nevoria-70b-GGUF/L3.3-MS-Nevoria-70b-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "l3.3-70b-magnum-v4-se"
|
||||
urls:
|
||||
- https://huggingface.co/Doctor-Shotgun/L3.3-70B-Magnum-v4-SE
|
||||
- https://huggingface.co/bartowski/L3.3-70B-Magnum-v4-SE-GGUF
|
||||
description: |
|
||||
The Magnum v4 series is complete, but here's something a little extra I wanted to tack on as I wasn't entirely satisfied with the results of v4 72B. "SE" for Special Edition - this model is finetuned from meta-llama/Llama-3.3-70B-Instruct as an rsLoRA adapter. The dataset is a slightly revised variant of the v4 data with some elements of the v2 data re-introduced.
|
||||
|
||||
The objective, as with the other Magnum models, is to emulate the prose style and quality of the Claude 3 Sonnet/Opus series of models on a local scale, so don't be surprised to see "Claude-isms" in its output.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
|
||||
sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352
|
||||
uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "l3.3-prikol-70b-v0.2"
|
||||
icon: https://files.catbox.moe/x9t3zo.png
|
||||
urls:
|
||||
- https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.2
|
||||
- https://huggingface.co/bartowski/L3.3-Prikol-70B-v0.2-GGUF
|
||||
description: |
|
||||
A merge of some Llama 3.3 models because um uh yeah
|
||||
|
||||
Went extra schizo on the recipe, hoping for an extra fun result, and... Well, I guess it's an overall improvement over the previous revision. It's a tiny bit smarter, has even more distinct swipes and nice dialogues, but for some reason it's damn sloppy.
|
||||
|
||||
I've published the second step of this merge as a separate model, and I'd say the results are more interesting, but not as usable as this one. https://huggingface.co/Nohobby/AbominationSnowPig
|
||||
|
||||
Prompt format: Llama3 OR Llama3 Context and ChatML Instruct. It actually works a bit better this way
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc
|
||||
uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
- &rwkv
|
||||
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
|
||||
name: "rwkv-6-world-7b"
|
||||
icon: https://avatars.githubusercontent.com/u/132652788
|
||||
license: apache-2.0
|
||||
urls:
|
||||
- https://huggingface.co/RWKV/rwkv-6-world-7b
|
||||
@@ -426,6 +482,7 @@
|
||||
uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
|
||||
- &qwen25coder
|
||||
name: "qwen2.5-coder-14b"
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
license: apache-2.0
|
||||
tags:
|
||||
@@ -611,7 +668,7 @@
|
||||
uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
|
||||
- &opencoder
|
||||
name: "opencoder-8b-base"
|
||||
icon: https://github.com/OpenCoder-llm/opencoder-llm.github.io/blob/main/static/images/opencoder_icon.jpg?raw=true
|
||||
icon: https://avatars.githubusercontent.com/u/186387526
|
||||
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/infly/OpenCoder-8B-Base
|
||||
@@ -677,6 +734,7 @@
|
||||
uri: huggingface://QuantFactory/OpenCoder-1.5B-Instruct-GGUF/OpenCoder-1.5B-Instruct.Q4_K_M.gguf
|
||||
- &granite3
|
||||
name: "granite-3.0-1b-a400m-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/167822367
|
||||
urls:
|
||||
- https://huggingface.co/ibm-granite/granite-3.0-1b-a400m-instruct
|
||||
- https://huggingface.co/QuantFactory/granite-3.0-1b-a400m-instruct-GGUF
|
||||
@@ -761,10 +819,9 @@
|
||||
- filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||
sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
|
||||
uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||
- &llama32
|
||||
## llama3.2
|
||||
- &llama32 ## llama3.2
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
license: llama3.2
|
||||
description: |
|
||||
The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||
@@ -933,7 +990,6 @@
|
||||
uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
|
||||
- !!merge <<: *llama32
|
||||
name: "llama-3.2-sun-2.5b-chat"
|
||||
icon: https://i.ibb.co/PF0TdMJ/imagine-image-9a56cee7-0f4f-4cc2-b265-a5b8d04f266b.png
|
||||
urls:
|
||||
- https://huggingface.co/meditsolutions/Llama-3.2-SUN-2.5B-chat
|
||||
- https://huggingface.co/mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF
|
||||
@@ -965,7 +1021,6 @@
|
||||
uri: huggingface://mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF/Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf
|
||||
- !!merge <<: *llama32
|
||||
name: "llama-3.2-3b-instruct-uncensored"
|
||||
icon: https://i.imgur.com/JOePyAN.png
|
||||
urls:
|
||||
- https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF
|
||||
- https://huggingface.co/chuanli11/Llama-3.2-3B-Instruct-uncensored
|
||||
@@ -1299,9 +1354,9 @@
|
||||
- filename: FineMath-Llama-3B-Q4_K_M.gguf
|
||||
sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
|
||||
uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
|
||||
- &qwen25
|
||||
## Qwen2.5
|
||||
- &qwen25 ## Qwen2.5
|
||||
name: "qwen2.5-14b-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
@@ -1591,6 +1646,7 @@
|
||||
uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "supernova-medius"
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF
|
||||
description: |
|
||||
@@ -1745,7 +1801,7 @@
|
||||
uri: huggingface://bartowski/TheBeagle-v2beta-32B-MGS-GGUF/TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "meraj-mini"
|
||||
icon: https://i.ibb.co/CmPSSpq/Screenshot-2024-10-06-at-9-45-06-PM.png
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Meraj-Mini
|
||||
- https://huggingface.co/QuantFactory/Meraj-Mini-GGUF
|
||||
@@ -2125,7 +2181,6 @@
|
||||
sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8
|
||||
uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
icon: https://i.imgur.com/OxX2Usi.png
|
||||
name: "evathene-v1.0"
|
||||
urls:
|
||||
- https://huggingface.co/sophosympatheia/Evathene-v1.0
|
||||
@@ -2375,7 +2430,7 @@
|
||||
uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "virtuoso-small"
|
||||
icon: https://i.ibb.co/pXD6Bcv/SW2-U-g-QQLSH1-ZAbxhs-Iu-A.webp
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF
|
||||
description: |
|
||||
@@ -2484,7 +2539,6 @@
|
||||
sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea
|
||||
uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
icon: https://i.imgur.com/OxX2Usi.png
|
||||
name: "evathene-v1.3"
|
||||
urls:
|
||||
- https://huggingface.co/sophosympatheia/Evathene-v1.3
|
||||
@@ -2653,6 +2707,7 @@
|
||||
- cpu
|
||||
- function-calling
|
||||
name: "arch-function-1.5b"
|
||||
icon: https://avatars.githubusercontent.com/u/112724757
|
||||
uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/katanemolabs/Arch-Function-1.5B
|
||||
@@ -3092,7 +3147,7 @@
|
||||
uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "sky-t1-32b-preview"
|
||||
icon: https://raw.githubusercontent.com/NovaSky-AI/novasky-ai.github.io/main/assets/images/blue-bird-wider.jpeg
|
||||
icon: https://github.com/NovaSky-AI/novasky-ai.github.io/raw/main/assets/images/blue-bird-wider.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview
|
||||
- https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF
|
||||
@@ -3138,8 +3193,88 @@
|
||||
- filename: steiner-32b-preview-Q4_K_M.gguf
|
||||
sha256: 1d7bf6d6dc8db8c81b3e71dc89756cd23417bb0a645b7dcdd1f9457781a88652
|
||||
uri: huggingface://bartowski/steiner-32b-preview-GGUF/steiner-32b-preview-Q4_K_M.gguf
|
||||
- &smollm
|
||||
## SmolLM
|
||||
- !!merge <<: *qwen25
|
||||
name: "qwerus-7b"
|
||||
urls:
|
||||
- https://huggingface.co/mlabonne/Qwerus-7B
|
||||
- https://huggingface.co/bartowski/Qwerus-7B-GGUF
|
||||
description: |
|
||||
Qwerus-7B is a merge of the following models using LazyMergekit:
|
||||
PRIME-RL/Eurus-2-7B-PRIME
|
||||
Qwen/Qwen2.5-7B-Instruct
|
||||
overrides:
|
||||
parameters:
|
||||
model: Qwerus-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Qwerus-7B-Q4_K_M.gguf
|
||||
sha256: 3676629e8092a59f523393e6eb5072727f5213a9e03b7b81141f05a33743e20c
|
||||
uri: huggingface://bartowski/Qwerus-7B-GGUF/Qwerus-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "lb-reranker-0.5b-v1.0"
|
||||
urls:
|
||||
- https://huggingface.co/lightblue/lb-reranker-0.5B-v1.0
|
||||
- https://huggingface.co/bartowski/lb-reranker-0.5B-v1.0-GGUF
|
||||
description: |
|
||||
The LB Reranker has been trained to determine the relatedness of a given query to a piece of text, therefore allowing it to be used as a ranker or reranker in various retrieval-based tasks.
|
||||
|
||||
This model is fine-tuned from a Qwen/Qwen2.5-0.5B-Instruct model checkpoint and was trained for roughly 5.5 hours using the 8 x L20 instance (ecs.gn8is-8x.32xlarge) on Alibaba Cloud.
|
||||
|
||||
The training data for this model can be found at lightblue/reranker_continuous_filt_max7_train and the code for generating this data as well as running the training of the model can be found on our Github repo.
|
||||
|
||||
Trained on data in over 95 languages, this model is applicable to a broad range of use cases.
|
||||
|
||||
This model has three main benefits over comparable rerankers.
|
||||
|
||||
It has shown slightly higher performance on evaluation benchmarks.
|
||||
It has been trained on more languages than any previous model.
|
||||
It is a simple Causal LM model trained to output a string between "1" and "7".
|
||||
|
||||
This last point means that this model can be used natively with many widely available inference packages, including vLLM and LMDeploy. This in turns allows our reranker to benefit from improvements to inference as and when these packages release them.
|
||||
|
||||
Update: We have also found that this model works pretty well as a code snippet reranker too (P@1 of 96%)! See our Colab for more details.
|
||||
overrides:
|
||||
parameters:
|
||||
model: lb-reranker-0.5B-v1.0-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: lb-reranker-0.5B-v1.0-Q4_K_M.gguf
|
||||
sha256: 43568150de5136da15c996bbf4d1a78cc6580515c40f0ef9a8c90b0542228ab3
|
||||
uri: huggingface://bartowski/lb-reranker-0.5B-v1.0-GGUF/lb-reranker-0.5B-v1.0-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "uwu-7b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/qingy2024/UwU-7B-Instruct
|
||||
- https://huggingface.co/bartowski/UwU-7B-Instruct-GGUF
|
||||
description: |
|
||||
Small QwQ, full-finetuned on FineQwQ-142K. Unlike my previous models, this one is a general-purpose reasoning machine!
|
||||
overrides:
|
||||
parameters:
|
||||
model: UwU-7B-Instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: UwU-7B-Instruct-Q4_K_M.gguf
|
||||
sha256: 279b2ba20d51bb155c8dd497cf49e0c28407b1822c75de88cfd83d13fd14a59f
|
||||
uri: huggingface://bartowski/UwU-7B-Instruct-GGUF/UwU-7B-Instruct-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "drt-o1-14b"
|
||||
urls:
|
||||
- https://huggingface.co/Krystalan/DRT-o1-14B
|
||||
- https://huggingface.co/bartowski/DRT-o1-14B-GGUF
|
||||
description: |
|
||||
This repository contains the resources for our paper "DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought"
|
||||
In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,
|
||||
|
||||
🌟 We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.
|
||||
🌟 We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.
|
||||
🌟 We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.
|
||||
|
||||
Our goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.
|
||||
overrides:
|
||||
parameters:
|
||||
model: DRT-o1-14B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DRT-o1-14B-Q4_K_M.gguf
|
||||
sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328
|
||||
uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf
|
||||
- &smollm ## SmolLM
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "smollm-1.7b-instruct"
|
||||
icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
|
||||
@@ -3183,10 +3318,23 @@
|
||||
- filename: smollm2-1.7b-instruct-q4_k_m.gguf
|
||||
sha256: decd2598bc2c8ed08c19adc3c8fdd461ee19ed5708679d1c54ef54a5a30d4f33
|
||||
uri: huggingface://HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf
|
||||
- &llama31
|
||||
## LLama3.1
|
||||
- !!merge <<: *qwen25
|
||||
name: "vikhr-qwen-2.5-1.5b-instruct"
|
||||
urls:
|
||||
- https://huggingface.co/Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct
|
||||
- https://huggingface.co/QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF
|
||||
description: |
|
||||
Instructive model based on Qwen-2.5-1.5B-Instruct, trained on the Russian-language dataset GrandMaster-PRO-MAX. Designed for high-efficiency text processing in Russian and English, delivering precise responses and fast task execution.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
|
||||
sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd
|
||||
uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
|
||||
- &llama31 ## LLama3.1
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
name: "meta-llama-3.1-8b-instruct"
|
||||
license: llama3.1
|
||||
description: |
|
||||
@@ -3275,7 +3423,7 @@
|
||||
sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
|
||||
- !!merge <<: *llama31
|
||||
name: "meta-llama-3.1-8b-instruct-abliterated"
|
||||
icon: https://i.imgur.com/KhorYYG.png
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/AsTgL8VCgMHgobq4cr46b.png
|
||||
urls:
|
||||
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
|
||||
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
|
||||
@@ -3304,7 +3452,7 @@
|
||||
uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "openbuddy-llama3.1-8b-v22.1-131k"
|
||||
icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
|
||||
icon: https://github.com/OpenBuddy/OpenBuddy/raw/main/media/demo.png
|
||||
urls:
|
||||
- https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
|
||||
description: |
|
||||
@@ -3480,7 +3628,7 @@
|
||||
sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f
|
||||
uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
name: "llama-spark"
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Llama-Spark
|
||||
@@ -3598,7 +3746,6 @@
|
||||
- !!merge <<: *llama31
|
||||
name: "llama-3.1-supernova-lite-reflection-v1.0-i1"
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master"
|
||||
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
|
||||
urls:
|
||||
- https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0
|
||||
- https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF
|
||||
@@ -3613,7 +3760,7 @@
|
||||
uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "llama-3.1-supernova-lite"
|
||||
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite
|
||||
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF
|
||||
@@ -4127,6 +4274,7 @@
|
||||
uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "hermes-3-llama-3.1-8b-lorablated"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
|
||||
urls:
|
||||
- https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF
|
||||
description: |
|
||||
@@ -4335,7 +4483,6 @@
|
||||
sha256: 27b10c3ca4507e8bf7d305d60e5313b54ef5fffdb43a03f36223d19d906e39f3
|
||||
uri: huggingface://mradermacher/L3.1-70Blivion-v0.1-rc1-70B-i1-GGUF/L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
icon: https://i.imgur.com/sdN0Aqg.jpeg
|
||||
name: "llama-3.1-hawkish-8b"
|
||||
urls:
|
||||
- https://huggingface.co/mukaj/Llama-3.1-Hawkish-8B
|
||||
@@ -5072,11 +5219,10 @@
|
||||
- filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
|
||||
sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
|
||||
uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
|
||||
- &deepseek
|
||||
## Deepseek
|
||||
- &deepseek ## Deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||
name: "deepseek-coder-v2-lite-instruct"
|
||||
icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
license: deepseek
|
||||
description: |
|
||||
DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks. Specifically, DeepSeek-Coder-V2 is further pre-trained from DeepSeek-Coder-V2-Base with 6 trillion tokens sourced from a high-quality and multi-source corpus. Through this continued pre-training, DeepSeek-Coder-V2 substantially enhances the coding and mathematical reasoning capabilities of DeepSeek-Coder-V2-Base, while maintaining comparable performance in general language tasks. Compared to DeepSeek-Coder, DeepSeek-Coder-V2 demonstrates significant advancements in various aspects of code-related tasks, as well as reasoning and general capabilities. Additionally, DeepSeek-Coder-V2 expands its support for programming languages from 86 to 338, while extending the context length from 16K to 128K.
|
||||
@@ -5138,10 +5284,10 @@
|
||||
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
|
||||
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
- &qwen2
|
||||
## Start QWEN2
|
||||
- &qwen2 ## Start QWEN2
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "qwen2-7b-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
license: apache-2.0
|
||||
description: |
|
||||
Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 7B Qwen2 model.
|
||||
@@ -5248,7 +5394,7 @@
|
||||
uri: huggingface://bartowski/Einstein-v7-Qwen2-7B-GGUF/Einstein-v7-Qwen2-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "arcee-spark"
|
||||
icon: https://i.ibb.co/80ssNWS/o-Vdk-Qx-ARNmzr-Pi1h-Efj-SA.webp
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
description: |
|
||||
Arcee Spark is a powerful 7B parameter language model that punches well above its weight class. Initialized from Qwen2, this model underwent a sophisticated training process:
|
||||
|
||||
@@ -5286,7 +5432,7 @@
|
||||
uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "arcee-agent"
|
||||
icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
description: |
|
||||
Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum.
|
||||
urls:
|
||||
@@ -5471,8 +5617,34 @@
|
||||
- filename: marco-o1-uncensored.Q4_K_M.gguf
|
||||
sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
|
||||
uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
|
||||
- &mistral03
|
||||
## START Mistral
|
||||
- !!merge <<: *qwen2
|
||||
name: "minicpm-v-2_6"
|
||||
license: apache-2.0
|
||||
icon: https://avatars.githubusercontent.com/u/89920203
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6
|
||||
description: |
|
||||
MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- qwen2
|
||||
- cpu
|
||||
overrides:
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
- &mistral03 ## START Mistral
|
||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
||||
name: "mistral-7b-instruct-v0.3"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
|
||||
@@ -5980,7 +6152,6 @@
|
||||
- !!merge <<: *mistral03
|
||||
name: "mn-12b-mag-mell-r1-iq-arm-imatrix"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: "https://i.imgur.com/wjyAaTO.png"
|
||||
urls:
|
||||
- https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1
|
||||
- https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix
|
||||
@@ -6077,8 +6248,35 @@
|
||||
- filename: Nera_Noctis-12B-Q4_K_M.gguf
|
||||
sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff
|
||||
uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf
|
||||
- &mudler
|
||||
### START mudler's LocalAI specific-models
|
||||
- !!merge <<: *mistral03
|
||||
name: "wayfarer-12b"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://huggingface.co/LatitudeGames/Wayfarer-12B/resolve/main/wayfarer.jpg
|
||||
urls:
|
||||
- https://huggingface.co/LatitudeGames/Wayfarer-12B
|
||||
- https://huggingface.co/bartowski/Wayfarer-12B-GGUF
|
||||
description: |
|
||||
We’ve heard over and over from AI Dungeon players that modern AI models are too nice, never letting them fail or die. While it may be good for a chatbot to be nice and helpful, great stories and games aren’t all rainbows and unicorns. They have conflict, tension, and even death. These create real stakes and consequences for characters and the journeys they go on.
|
||||
|
||||
Similarly, great games need opposition. You must be able to fail, die, and may even have to start over. This makes games more fun!
|
||||
|
||||
However, the vast majority of AI models, through alignment RLHF, have been trained away from darkness, violence, or conflict, preventing them from fulfilling this role. To give our players better options, we decided to train our own model to fix these issues.
|
||||
|
||||
Wayfarer is an adventure role-play model specifically trained to give players a challenging and dangerous experience. We thought they would like it, but since releasing it on AI Dungeon, players have reacted even more positively than we expected.
|
||||
|
||||
Because they loved it so much, we’ve decided to open-source the model so anyone can experience unforgivingly brutal AI adventures! Anyone can download the model to run locally.
|
||||
|
||||
Or if you want to easily try this model for free, you can do so at https://aidungeon.com.
|
||||
|
||||
We plan to continue improving and open-sourcing similar models, so please share any and all feedback on how we can improve model behavior. Below we share more details on how Wayfarer was created.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Wayfarer-12B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Wayfarer-12B-Q4_K_M.gguf
|
||||
sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
|
||||
uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
|
||||
- &mudler ### START mudler's LocalAI specific-models
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
||||
name: "LocalAI-llama3-8b-function-call-v0.2"
|
||||
icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
|
||||
@@ -6123,8 +6321,7 @@
|
||||
- filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
|
||||
sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
|
||||
uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
|
||||
- &parler-tts
|
||||
### START parler-tts
|
||||
- &parler-tts ### START parler-tts
|
||||
url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
|
||||
name: parler-tts-mini-v0.1
|
||||
overrides:
|
||||
@@ -6141,8 +6338,7 @@
|
||||
- cpu
|
||||
- text-to-speech
|
||||
- python
|
||||
- &rerankers
|
||||
### START rerankers
|
||||
- &rerankers ### START rerankers
|
||||
url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
|
||||
name: cross-encoder
|
||||
parameters:
|
||||
@@ -6182,6 +6378,7 @@
|
||||
- &gemma
|
||||
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
||||
name: "gemma-2b"
|
||||
icon: https://avatars.githubusercontent.com/u/1342004
|
||||
license: gemma
|
||||
urls:
|
||||
- https://ai.google.dev/gemma/docs
|
||||
@@ -6897,7 +7094,7 @@
|
||||
uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf
|
||||
- &llama3
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
name: "llama3-8b-instruct"
|
||||
license: llama3
|
||||
description: |
|
||||
@@ -7064,10 +7261,9 @@
|
||||
name: "l3-8b-stheno-v3.1"
|
||||
urls:
|
||||
- https://huggingface.co/Sao10K/L3-8B-Stheno-v3.1
|
||||
icon: https://w.forfun.com/fetch/cb/cba2205390e517bea1ea60ca0b491af4.jpeg
|
||||
description: |
|
||||
- A model made for 1-on-1 Roleplay ideally, but one that is able to handle scenarios, RPGs and storywriting fine.
|
||||
- Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
|
||||
- Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
|
||||
- I quite like the prose and style for this model.
|
||||
overrides:
|
||||
parameters:
|
||||
@@ -7858,7 +8054,6 @@
|
||||
urls:
|
||||
- https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF
|
||||
- https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0
|
||||
icon: https://imgur.com/tKzncGo.png
|
||||
description: |
|
||||
This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork.
|
||||
This model is uncensored. You are responsible for whatever you do with it.
|
||||
@@ -8210,7 +8405,8 @@
|
||||
- filename: dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
|
||||
sha256: 566331c2efe87725310aacb709ca15088a0063fa0ddc14a345bf20d69982156b
|
||||
uri: huggingface://bartowski/dolphin-2.9.2-Phi-3-Medium-abliterated-GGUF/dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
|
||||
- url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
- !!merge <<: *llama3
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "llama-3-8b-instruct-dpo-v0.3-32k"
|
||||
license: llama3
|
||||
urls:
|
||||
@@ -8364,7 +8560,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Llama-3-SEC-Chat-GGUF
|
||||
- https://huggingface.co/arcee-ai/Llama-3-SEC-Chat
|
||||
icon: https://i.ibb.co/kHtBmDN/w8m6-X4-HCQRa-IR86ar-Cm5gg.webp
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
tags:
|
||||
- llama3
|
||||
- gguf
|
||||
@@ -8394,10 +8590,9 @@
|
||||
- filename: Copus-2x8B.i1-Q4_K_M.gguf
|
||||
sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
|
||||
uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
|
||||
- &yi-chat
|
||||
### Start Yi
|
||||
- &yi-chat ### Start Yi
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: "https://raw.githubusercontent.com/01-ai/Yi/main/assets/img/Yi_logo_icon_light.svg"
|
||||
icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
|
||||
name: "yi-1.5-9b-chat"
|
||||
license: apache-2.0
|
||||
urls:
|
||||
@@ -8606,8 +8801,7 @@
|
||||
- filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
|
||||
sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
|
||||
uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
|
||||
- &noromaid
|
||||
### Start noromaid
|
||||
- &noromaid ### Start noromaid
|
||||
url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
|
||||
name: "noromaid-13b-0.4-DPO"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
|
||||
@@ -8627,8 +8821,7 @@
|
||||
- filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
|
||||
uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
- &wizardlm2
|
||||
### START Vicuna based
|
||||
- &wizardlm2 ### START Vicuna based
|
||||
url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
|
||||
name: "wizardlm2-7b"
|
||||
description: |
|
||||
@@ -8683,8 +8876,9 @@
|
||||
- filename: moondream2-mmproj-f16.gguf
|
||||
sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f
|
||||
uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf
|
||||
- &llava
|
||||
### START LLaVa
|
||||
- &llava ### START LLaVa
|
||||
name: "llava-1.6-vicuna"
|
||||
icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png
|
||||
url: "github:mudler/LocalAI/gallery/llava.yaml@master"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
@@ -8698,7 +8892,6 @@
|
||||
- gpu
|
||||
- llama2
|
||||
- cpu
|
||||
name: "llava-1.6-vicuna"
|
||||
overrides:
|
||||
mmproj: mmproj-vicuna7b-f16.gguf
|
||||
parameters:
|
||||
@@ -9026,7 +9219,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf
|
||||
description: |
|
||||
Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, MiniCPM and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
|
||||
Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
|
||||
|
||||
We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub.
|
||||
icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png
|
||||
@@ -9075,6 +9268,7 @@
|
||||
uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "minicpm-llama3-v-2_5"
|
||||
icon: https://avatars.githubusercontent.com/u/89920203
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5
|
||||
@@ -9165,7 +9359,6 @@
|
||||
June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made.
|
||||
The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work.
|
||||
June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model.
|
||||
icon: https://i.imgur.com/Kpk1PgZ.png
|
||||
overrides:
|
||||
parameters:
|
||||
model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
|
||||
@@ -9191,7 +9384,6 @@
|
||||
uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3_8b_unaligned_alpha_rp_soup-i1"
|
||||
icon: https://i.imgur.com/pXcjpoV.png
|
||||
urls:
|
||||
- https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup
|
||||
- https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF
|
||||
@@ -9541,8 +9733,7 @@
|
||||
- filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
|
||||
sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0
|
||||
uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
|
||||
- &chatml
|
||||
### ChatML
|
||||
- &chatml ### ChatML
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "una-thepitbull-21.4b-v2"
|
||||
license: afl-3.0
|
||||
@@ -9590,7 +9781,6 @@
|
||||
sha256: 9c90f3a65332a03a6cbb563eee19c7586d9544f646ff9f33f7f1904b3d415ae2
|
||||
uri: huggingface://nold/HelpingAI-9B-GGUF/HelpingAI-9B_Q4_K_M.gguf
|
||||
- url: "github:mudler/LocalAI/gallery/chatml-hercules.yaml@master"
|
||||
icon: "https://tse3.mm.bing.net/th/id/OIG1.vnrl3xpEcypR3McLW63q?pid=ImgGn"
|
||||
urls:
|
||||
- https://huggingface.co/Locutusque/Llama-3-Hercules-5.0-8B
|
||||
- https://huggingface.co/bartowski/Llama-3-Hercules-5.0-8B-GGUF
|
||||
@@ -9828,8 +10018,7 @@
|
||||
- filename: Triangulum-10B.Q4_K_M.gguf
|
||||
sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa
|
||||
uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf
|
||||
- &command-R
|
||||
### START Command-r
|
||||
- &command-R ### START Command-r
|
||||
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
|
||||
name: "command-r-v01:q1_s"
|
||||
license: "cc-by-nc-4.0"
|
||||
@@ -9884,8 +10073,7 @@
|
||||
- filename: "aya-23-35B-Q4_K_M.gguf"
|
||||
sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d"
|
||||
uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf"
|
||||
- &phi-2-chat
|
||||
### START Phi-2
|
||||
- &phi-2-chat ### START Phi-2
|
||||
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
|
||||
license: mit
|
||||
description: |
|
||||
@@ -9914,6 +10102,7 @@
|
||||
- llama2
|
||||
- cpu
|
||||
name: "phi-2-chat:Q8_0"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722
|
||||
overrides:
|
||||
parameters:
|
||||
model: phi-2-layla-v1-chatml-Q8_0.gguf
|
||||
@@ -9960,7 +10149,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/internlm/internlm2_5-7b-chat-1m
|
||||
- https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF
|
||||
icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e
|
||||
icon: https://avatars.githubusercontent.com/u/135356492
|
||||
tags:
|
||||
- internlm2
|
||||
- gguf
|
||||
@@ -9981,10 +10170,35 @@
|
||||
- filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf
|
||||
sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564
|
||||
- &phi-3
|
||||
### START Phi-3
|
||||
### Internlm3
|
||||
- name: "internlm3-8b-instruct"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/internlm/internlm3-8b-instruct
|
||||
- https://huggingface.co/bartowski/internlm3-8b-instruct-GGUF
|
||||
icon: https://avatars.githubusercontent.com/u/135356492
|
||||
tags:
|
||||
- internlm3
|
||||
- gguf
|
||||
- cpu
|
||||
- gpu
|
||||
description: |
|
||||
InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning. The model has the following characteristics:
|
||||
|
||||
Enhanced performance at reduced cost: State-of-the-art performance on reasoning and knowledge-intensive tasks surpass models like Llama3.1-8B and Qwen2.5-7B.
|
||||
|
||||
Deep thinking capability: InternLM3 supports both the deep thinking mode for solving complicated reasoning tasks via the long chain-of-thought and the normal response mode for fluent user interactions.
|
||||
overrides:
|
||||
parameters:
|
||||
model: internlm3-8b-instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: internlm3-8b-instruct-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
|
||||
sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
|
||||
- &phi-3 ### START Phi-3
|
||||
url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
|
||||
name: "phi-3-mini-4k-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722
|
||||
license: mit
|
||||
description: |
|
||||
The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters.
|
||||
@@ -10181,8 +10395,7 @@
|
||||
- filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf
|
||||
sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36
|
||||
uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf
|
||||
- &hermes-2-pro-mistral
|
||||
### START Hermes
|
||||
- &hermes-2-pro-mistral ### START Hermes
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
name: "hermes-2-pro-mistral"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
|
||||
@@ -10518,8 +10731,7 @@
|
||||
- filename: "galatolo-Q4_K.gguf"
|
||||
sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172"
|
||||
uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf"
|
||||
- &codellama
|
||||
### START Codellama
|
||||
- &codellama ### START Codellama
|
||||
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
|
||||
name: "codellama-7b"
|
||||
license: llama2
|
||||
@@ -10650,8 +10862,7 @@
|
||||
- filename: "llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
|
||||
- &openvino
|
||||
### START OpenVINO
|
||||
- &openvino ### START OpenVINO
|
||||
url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
|
||||
name: "openvino-llama-3-8b-instruct-ov-int8"
|
||||
license: llama3
|
||||
@@ -10765,8 +10976,7 @@
|
||||
- gpu
|
||||
- embedding
|
||||
- cpu
|
||||
- &sentencentransformers
|
||||
### START Embeddings
|
||||
- &sentencentransformers ### START Embeddings
|
||||
description: |
|
||||
This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
|
||||
urls:
|
||||
@@ -10781,8 +10991,7 @@
|
||||
overrides:
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
- &dreamshaper
|
||||
### START Image generation
|
||||
- &dreamshaper ### START Image generation
|
||||
name: dreamshaper
|
||||
icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
|
||||
license: other
|
||||
@@ -10819,6 +11028,36 @@
|
||||
- sd-3
|
||||
- gpu
|
||||
url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
|
||||
- name: sd-3.5-large-ggml
|
||||
license: stabilityai-ai-community
|
||||
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
|
||||
description: |
|
||||
Stable Diffusion 3.5 Large is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
|
||||
urls:
|
||||
- https://huggingface.co/stabilityai/stable-diffusion-3.5-large
|
||||
- https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
|
||||
tags:
|
||||
- text-to-image
|
||||
- flux
|
||||
- gpu
|
||||
- cpu
|
||||
icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png
|
||||
overrides:
|
||||
parameters:
|
||||
model: sd3.5_large-Q4_0.gguf
|
||||
files:
|
||||
- filename: "sd3.5_large-Q4_0.gguf"
|
||||
sha256: "c79ed6cdaa7decaca6b05ccc636b956b37c47de9b104c56315ca8ed086347b00"
|
||||
uri: "huggingface://second-state/stable-diffusion-3.5-large-GGUF/sd3.5_large-Q4_0.gguf"
|
||||
- filename: clip_g.safetensors
|
||||
sha256: ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_g.safetensors
|
||||
- filename: clip_l.safetensors
|
||||
sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_l.safetensors
|
||||
- filename: t5xxl-Q5_0.gguf
|
||||
sha256: f4df16c641a05c4a6ca717068ba3ee312875000f6fac0efbd152915553b5fc3e
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
|
||||
- &flux
|
||||
name: flux.1-dev
|
||||
license: flux-1-dev-non-commercial-license
|
||||
@@ -10894,8 +11133,7 @@
|
||||
- filename: t5xxl_fp16.safetensors
|
||||
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
|
||||
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
|
||||
- &whisper
|
||||
## Whisper
|
||||
- &whisper ## Whisper
|
||||
url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
|
||||
name: "whisper-1"
|
||||
license: "MIT"
|
||||
@@ -11075,17 +11313,7 @@
|
||||
description: |
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
name: stablediffusion-cpp
|
||||
## Tiny Dream
|
||||
- url: github:mudler/LocalAI/gallery/tinydream.yaml@master
|
||||
name: tinydream
|
||||
license: "BSD-3"
|
||||
urls:
|
||||
- https://github.com/symisc/tiny-dream
|
||||
- https://github.com/symisc/tiny-dream/blob/main/LICENSE
|
||||
description: |
|
||||
An embedded, Header Only, Stable Diffusion C++ implementation
|
||||
- &piper
|
||||
## Piper TTS
|
||||
- &piper ## Piper TTS
|
||||
url: github:mudler/LocalAI/gallery/piper.yaml@master
|
||||
name: voice-en-us-kathleen-low
|
||||
icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
|
||||
|
||||
12
gallery/sd-ggml.yaml
Normal file
12
gallery/sd-ggml.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
---
|
||||
name: "sd-ggml"
|
||||
|
||||
config_file: |
|
||||
backend: stablediffusion-ggml
|
||||
step: 25
|
||||
cfg_scale: 4.5
|
||||
options:
|
||||
- "clip_l_path:clip_l.safetensors"
|
||||
- "clip_g_path:clip_g.safetensors"
|
||||
- "t5xxl_path:t5xxl-Q5_0.gguf"
|
||||
- "sampler:euler"
|
||||
@@ -1,37 +0,0 @@
|
||||
---
|
||||
name: "tinydream"
|
||||
|
||||
config_file: |
|
||||
name: tinydream
|
||||
backend: tinydream
|
||||
parameters:
|
||||
model: tinydream_assets
|
||||
|
||||
files:
|
||||
- filename: "tinydream_assets/AutoencoderKL-fp16.bin"
|
||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.bin"
|
||||
- filename: "tinydream_assets/AutoencoderKL-fp16.param"
|
||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/AutoencoderKL-fp16.param"
|
||||
- filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.bin"
|
||||
- filename: "tinydream_assets/FrozenCLIPEmbedder-fp16.param"
|
||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/FrozenCLIPEmbedder-fp16.param"
|
||||
- filename: "tinydream_assets/RealESRGAN_x4plus_anime.bin"
|
||||
sha256: "fe01c269cfd10cdef8e018ab66ebe750cf79c7af4d1f9c16c737e1295229bacc"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.bin"
|
||||
- filename: "tinydream_assets/RealESRGAN_x4plus_anime.param"
|
||||
sha256: "2b8fb6e0ae4d2d85704ca08c119a2f5ea40add4f2ecd512eb7f4cd44b6127ed4"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/RealESRGAN_x4plus_anime.param"
|
||||
- filename: "tinydream_assets/UNetModel-fp16.bin"
|
||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.bin"
|
||||
- filename: "tinydream_assets/UNetModel-fp16.param"
|
||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/UNetModel-fp16.param"
|
||||
- filename: "tinydream_assets/vocab.txt"
|
||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||
uri: "https://github.com/M0Rf30/tiny-dream-bins/releases/download/1.0/vocab.txt"
|
||||
2
go.mod
2
go.mod
@@ -6,7 +6,6 @@ toolchain go1.23.1
|
||||
|
||||
require (
|
||||
dario.cat/mergo v1.0.1
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9
|
||||
github.com/Masterminds/sprig/v3 v3.3.0
|
||||
github.com/alecthomas/kong v0.9.0
|
||||
github.com/census-instrumentation/opencensus-proto v0.4.1
|
||||
@@ -94,6 +93,7 @@ require (
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/nikolalohinski/gonja/v2 v2.3.2 // indirect
|
||||
github.com/philippgille/chromem-go v0.7.0 // indirect
|
||||
github.com/pion/datachannel v1.5.10 // indirect
|
||||
github.com/pion/dtls/v2 v2.2.12 // indirect
|
||||
github.com/pion/ice/v2 v2.3.37 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@@ -27,8 +27,6 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
|
||||
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
|
||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 h1:ASsbvw7wQPldWpwKdmYRszJ2A8Cj3oJDr4zO0DiXvN4=
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
|
||||
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
||||
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
|
||||
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
|
||||
@@ -613,6 +611,8 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1H
|
||||
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
|
||||
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
|
||||
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
|
||||
github.com/philippgille/chromem-go v0.7.0 h1:4jfvfyKymjKNfGxBUhHUcj1kp7B17NL/I1P+vGh1RvY=
|
||||
github.com/philippgille/chromem-go v0.7.0/go.mod h1:hTd+wGEm/fFPQl7ilfCwQXkgEUxceYh86iIdoKMolPo=
|
||||
github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
|
||||
github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/pion/datachannel v1.5.8 h1:ph1P1NsGkazkjrvyMfhRBUAWMxugJjq2HfQifaOoSNo=
|
||||
|
||||
@@ -46,8 +46,7 @@ type Backend interface {
|
||||
Status(ctx context.Context) (*pb.StatusResponse, error)
|
||||
|
||||
StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
|
||||
StoresReset(ctx context.Context, in *pb.StoresResetOptions, opts ...grpc.CallOption) (*pb.Result, error)
|
||||
StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
|
||||
|
||||
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user