mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
24 Commits
chromem_st
...
speculativ
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b16a01d0bd | ||
|
|
9a1182fa01 | ||
|
|
66e9ef3f33 | ||
|
|
8282414583 | ||
|
|
d1d7ce83d4 | ||
|
|
5177837ab0 | ||
|
|
f9e368b7c4 | ||
|
|
eef80b9880 | ||
|
|
073eaec729 | ||
|
|
318225f631 | ||
|
|
89429a439b | ||
|
|
200fe358f0 | ||
|
|
e426ab7c23 | ||
|
|
715071b68d | ||
|
|
a05737c7e4 | ||
|
|
e8eb0b2c50 | ||
|
|
e15d29aba2 | ||
|
|
10675ac28e | ||
|
|
0ec25b8b07 | ||
|
|
e81ceff681 | ||
|
|
6831719e1e | ||
|
|
b264a91b3f | ||
|
|
1a08948e63 | ||
|
|
14a1e02f44 |
@@ -7,7 +7,7 @@ services:
|
||||
args:
|
||||
- FFMPEG=true
|
||||
- IMAGE_TYPE=extras
|
||||
- GO_TAGS=stablediffusion p2p tts
|
||||
- GO_TAGS=p2p tts
|
||||
env_file:
|
||||
- ../.env
|
||||
ports:
|
||||
|
||||
6
.env
6
.env
@@ -38,12 +38,12 @@
|
||||
## Uncomment and set to true to enable rebuilding from source
|
||||
# REBUILD=true
|
||||
|
||||
## Enable go tags, available: stablediffusion, tts
|
||||
## stablediffusion: image generation with stablediffusion
|
||||
## Enable go tags, available: p2p, tts
|
||||
## p2p: enable distributed inferencing
|
||||
## tts: enables text-to-speech with go-piper
|
||||
## (requires REBUILD=true)
|
||||
#
|
||||
# GO_TAGS=stablediffusion
|
||||
# GO_TAGS=p2p
|
||||
|
||||
## Path where to store generated images
|
||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||
|
||||
35
.github/workflows/release.yaml
vendored
35
.github/workflows/release.yaml
vendored
@@ -237,40 +237,7 @@ jobs:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
build-stablediffusion:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build stablediffusion
|
||||
run: |
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
make backend-assets/grpc/stablediffusion
|
||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||
env:
|
||||
GO_TAGS: stablediffusion
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: stablediffusion
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
|
||||
build-macOS-x86_64:
|
||||
runs-on: macos-13
|
||||
|
||||
51
.github/workflows/test-extra.yml
vendored
51
.github/workflows/test-extra.yml
vendored
@@ -78,57 +78,6 @@ jobs:
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||
|
||||
tests-parler-tts:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test parler-tts
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
tests-openvoice:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test openvoice
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
||||
|
||||
# tests-transformers-musicgen:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
|
||||
6
.github/workflows/test.yml
vendored
6
.github/workflows/test.yml
vendored
@@ -105,9 +105,7 @@ jobs:
|
||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||
make sources/go-piper && \
|
||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
||||
env:
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
@@ -129,7 +127,7 @@ jobs:
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||
- name: Test
|
||||
run: |
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
|
||||
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -26,7 +26,7 @@
|
||||
"LOCALAI_P2P": "true",
|
||||
"LOCALAI_FEDERATED": "true"
|
||||
},
|
||||
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
||||
"buildFlags": ["-tags", "p2p tts", "-v"],
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"cwd": "${workspaceRoot}"
|
||||
}
|
||||
|
||||
53
Dockerfile
53
Dockerfile
@@ -15,8 +15,7 @@ ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
@@ -69,14 +68,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
libopencv-dev && \
|
||||
libopenblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up OpenCV
|
||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
###################################
|
||||
@@ -251,7 +246,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
|
||||
FROM requirements-drivers AS builder-base
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts p2p"
|
||||
ARG GO_TAGS="tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
ARG LD_FLAGS="-s -w"
|
||||
@@ -285,35 +280,12 @@ RUN <<EOT bash
|
||||
fi
|
||||
EOT
|
||||
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
||||
FROM builder-base AS builder-sd
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
||||
COPY Makefile .
|
||||
COPY go.mod .
|
||||
COPY go.sum .
|
||||
COPY backend/backend.proto ./backend/backend.proto
|
||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
||||
COPY pkg/grpc ./pkg/grpc
|
||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
||||
RUN git init
|
||||
RUN make sources/go-stable-diffusion
|
||||
RUN touch prepare-sources
|
||||
|
||||
# Actually build the backend
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM builder-sd AS builder
|
||||
FROM builder-base AS builder
|
||||
|
||||
# Install the pre-built GRPC
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
@@ -331,7 +303,7 @@ RUN make prepare
|
||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||
## (both will use CUDA or hipblas for the actual computation)
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||
else \
|
||||
make build; \
|
||||
fi
|
||||
@@ -353,8 +325,6 @@ ARG FFMPEG
|
||||
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
||||
|
||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||
|
||||
# Add FFmpeg
|
||||
@@ -427,9 +397,6 @@ COPY --from=builder /build/local-ai ./
|
||||
# Copy shared libraries for piper
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
# Change the shell to bash so we can use [[ tests below
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||
@@ -443,8 +410,8 @@ RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/coqui \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/parler-tts \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/faster-whisper \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/diffusers \
|
||||
@@ -453,9 +420,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/kokoro \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/openvoice \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi && \
|
||||
@@ -474,9 +438,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/mamba \
|
||||
; fi
|
||||
|
||||
# Make sure the models directory exists
|
||||
|
||||
86
Makefile
86
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7
|
||||
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@@ -18,10 +18,6 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
@@ -179,11 +175,6 @@ ifeq ($(STATIC),true)
|
||||
LD_FLAGS+=-linkmode external -extldflags -static
|
||||
endif
|
||||
|
||||
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
||||
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||
@@ -195,6 +186,7 @@ endif
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
@@ -273,19 +265,6 @@ sources/go-piper:
|
||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||
|
||||
## stable diffusion (onnx)
|
||||
sources/go-stable-diffusion:
|
||||
mkdir -p sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && \
|
||||
git init && \
|
||||
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(STABLEDIFFUSION_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||
|
||||
## stablediffusion (ggml)
|
||||
sources/stablediffusion-ggml.cpp:
|
||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||
@@ -331,20 +310,18 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
@@ -355,7 +332,6 @@ rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) build
|
||||
|
||||
@@ -470,7 +446,7 @@ prepare-test: grpcs
|
||||
|
||||
test: prepare test-models/testmodel.ggml grpcs
|
||||
@echo 'Running tests'
|
||||
export GO_TAGS="tts stablediffusion debug"
|
||||
export GO_TAGS="tts debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
@@ -558,10 +534,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@@ -595,6 +571,14 @@ diffusers-protogen:
|
||||
diffusers-protogen-clean:
|
||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||
|
||||
.PHONY: faster-whisper-protogen
|
||||
faster-whisper-protogen:
|
||||
$(MAKE) -C backend/python/faster-whisper protogen
|
||||
|
||||
.PHONY: faster-whisper-protogen-clean
|
||||
faster-whisper-protogen-clean:
|
||||
$(MAKE) -C backend/python/faster-whisper protogen-clean
|
||||
|
||||
.PHONY: exllama2-protogen
|
||||
exllama2-protogen:
|
||||
$(MAKE) -C backend/python/exllama2 protogen
|
||||
@@ -603,14 +587,6 @@ exllama2-protogen:
|
||||
exllama2-protogen-clean:
|
||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||
|
||||
.PHONY: mamba-protogen
|
||||
mamba-protogen:
|
||||
$(MAKE) -C backend/python/mamba protogen
|
||||
|
||||
.PHONY: mamba-protogen-clean
|
||||
mamba-protogen-clean:
|
||||
$(MAKE) -C backend/python/mamba protogen-clean
|
||||
|
||||
.PHONY: rerankers-protogen
|
||||
rerankers-protogen:
|
||||
$(MAKE) -C backend/python/rerankers protogen
|
||||
@@ -627,14 +603,6 @@ transformers-protogen:
|
||||
transformers-protogen-clean:
|
||||
$(MAKE) -C backend/python/transformers protogen-clean
|
||||
|
||||
.PHONY: parler-tts-protogen
|
||||
parler-tts-protogen:
|
||||
$(MAKE) -C backend/python/parler-tts protogen
|
||||
|
||||
.PHONY: parler-tts-protogen-clean
|
||||
parler-tts-protogen-clean:
|
||||
$(MAKE) -C backend/python/parler-tts protogen-clean
|
||||
|
||||
.PHONY: kokoro-protogen
|
||||
kokoro-protogen:
|
||||
$(MAKE) -C backend/python/kokoro protogen
|
||||
@@ -643,14 +611,6 @@ kokoro-protogen:
|
||||
kokoro-protogen-clean:
|
||||
$(MAKE) -C backend/python/kokoro protogen-clean
|
||||
|
||||
.PHONY: openvoice-protogen
|
||||
openvoice-protogen:
|
||||
$(MAKE) -C backend/python/openvoice protogen
|
||||
|
||||
.PHONY: openvoice-protogen-clean
|
||||
openvoice-protogen-clean:
|
||||
$(MAKE) -C backend/python/openvoice protogen-clean
|
||||
|
||||
.PHONY: vllm-protogen
|
||||
vllm-protogen:
|
||||
$(MAKE) -C backend/python/vllm protogen
|
||||
@@ -666,13 +626,11 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/bark
|
||||
$(MAKE) -C backend/python/coqui
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
$(MAKE) -C backend/python/faster-whisper
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/mamba
|
||||
$(MAKE) -C backend/python/rerankers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
$(MAKE) -C backend/python/kokoro
|
||||
$(MAKE) -C backend/python/openvoice
|
||||
$(MAKE) -C backend/python/exllama2
|
||||
|
||||
prepare-test-extra: protogen-python
|
||||
@@ -742,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
||||
$(MAKE) -C backend/cpp/llama-avx512 purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||
$(MAKE) -C backend/cpp/llama-avx purge
|
||||
@@ -816,13 +781,6 @@ ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
</p>
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
|
||||
@@ -1,56 +1,17 @@
|
||||
name: stablediffusion
|
||||
backend: stablediffusion
|
||||
backend: stablediffusion-ggml
|
||||
cfg_scale: 4.5
|
||||
|
||||
options:
|
||||
- sampler:euler
|
||||
parameters:
|
||||
model: stablediffusion_assets
|
||||
|
||||
license: "BSD-3"
|
||||
urls:
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
||||
|
||||
description: |
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
||||
step: 25
|
||||
|
||||
download_files:
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
||||
- filename: "stablediffusion_assets/vocab.txt"
|
||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
||||
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
||||
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/images/generations \
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "backend.grpc.pb.h"
|
||||
#include "utils.hpp"
|
||||
#include "sampling.h"
|
||||
#include "speculative.h"
|
||||
// include std::regex
|
||||
#include <cstddef>
|
||||
#include <thread>
|
||||
@@ -185,12 +186,45 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
|
||||
return out;
|
||||
}
|
||||
|
||||
struct llama_slot_params {
|
||||
uint32_t seed = -1; // RNG seed
|
||||
bool stream = true;
|
||||
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
|
||||
bool return_tokens = false;
|
||||
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_indent = 0; // mininum line indentation for the generated text in number of whitespace characters
|
||||
|
||||
int64_t t_max_prompt_ms = -1; // TODO: implement
|
||||
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
|
||||
|
||||
std::vector<common_adapter_lora_info> lora;
|
||||
|
||||
std::vector<std::string> antiprompt;
|
||||
std::vector<std::string> response_fields;
|
||||
bool timings_per_token = false;
|
||||
bool post_sampling_probs = false;
|
||||
bool ignore_eos = false;
|
||||
|
||||
json input_prefix;
|
||||
json input_suffix;
|
||||
|
||||
struct common_params_sampling sampling;
|
||||
struct common_params_speculative speculative;
|
||||
};
|
||||
|
||||
|
||||
struct llama_client_slot
|
||||
{
|
||||
int id;
|
||||
int task_id = -1;
|
||||
|
||||
struct slot_params params;
|
||||
struct llama_slot_params params;
|
||||
common_speculative * spec = nullptr;
|
||||
llama_batch batch_spec = {};
|
||||
|
||||
|
||||
slot_state state = IDLE;
|
||||
slot_command command = NONE;
|
||||
@@ -283,6 +317,7 @@ struct llama_client_slot
|
||||
images.clear();
|
||||
}
|
||||
|
||||
|
||||
bool has_budget(common_params &global_params) {
|
||||
if (params.n_predict == -1 && global_params.n_predict == -1)
|
||||
{
|
||||
@@ -454,6 +489,10 @@ struct llama_server_context
|
||||
{
|
||||
llama_model *model = nullptr;
|
||||
llama_context *ctx = nullptr;
|
||||
common_init_result llama_init_dft;
|
||||
llama_context * ctx_dft = nullptr;
|
||||
llama_model * model_dft = nullptr;
|
||||
llama_context_params cparams_dft;
|
||||
const llama_vocab * vocab = nullptr;
|
||||
|
||||
clip_ctx *clp_ctx = nullptr;
|
||||
@@ -502,6 +541,7 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool load_model(const common_params ¶ms_)
|
||||
{
|
||||
params = params_;
|
||||
@@ -545,6 +585,45 @@ struct llama_server_context
|
||||
add_bos_token = llama_vocab_get_add_bos(vocab);
|
||||
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
||||
|
||||
if (!params.speculative.model.empty()) {
|
||||
LOG("loading draft model '%s'\n", params.speculative.model.c_str());
|
||||
|
||||
auto params_dft = params;
|
||||
|
||||
params_dft.devices = params.speculative.devices;
|
||||
params_dft.model = params.speculative.model;
|
||||
params_dft.n_ctx = params.speculative.n_ctx == 0 ? params.n_ctx / params.n_parallel : params.speculative.n_ctx;
|
||||
params_dft.n_gpu_layers = params.speculative.n_gpu_layers;
|
||||
params_dft.n_parallel = 1;
|
||||
|
||||
llama_init_dft = common_init_from_params(params_dft);
|
||||
|
||||
model_dft = llama_init_dft.model.get();
|
||||
|
||||
if (model_dft == nullptr) {
|
||||
LOG("failed to load draft model, '%s'\n", params.speculative.model.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!common_speculative_are_compatible(ctx, llama_init_dft.context.get())) {
|
||||
LOG("the draft model '%s' is not compatible with the target model '%s'\n", params.speculative.model.c_str(), params.model.c_str());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
const int n_ctx_dft = llama_n_ctx(llama_init_dft.context.get());
|
||||
|
||||
cparams_dft = common_context_params_to_llama(params_dft);
|
||||
cparams_dft.n_batch = n_ctx_dft;
|
||||
|
||||
// force F16 KV cache for the draft model for extra performance
|
||||
cparams_dft.type_k = GGML_TYPE_F16;
|
||||
cparams_dft.type_v = GGML_TYPE_F16;
|
||||
|
||||
// the context is not needed - we will create one for each slot
|
||||
llama_init_dft.context.reset();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -573,6 +652,22 @@ struct llama_server_context
|
||||
slot.n_ctx = n_ctx_slot;
|
||||
slot.n_predict = params.n_predict;
|
||||
|
||||
if (model_dft) {
|
||||
slot.batch_spec = llama_batch_init(params.speculative.n_max + 1, 0, 1);
|
||||
|
||||
ctx_dft = llama_init_from_model(model_dft, cparams_dft);
|
||||
if (ctx_dft == nullptr) {
|
||||
LOG("%s", "failed to create draft context\n");
|
||||
return;
|
||||
}
|
||||
|
||||
slot.spec = common_speculative_init(ctx_dft);
|
||||
if (slot.spec == nullptr) {
|
||||
LOG("%s", "failed to create speculator\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO("new slot", {
|
||||
{"slot_id", slot.id},
|
||||
{"n_ctx_slot", slot.n_ctx}
|
||||
@@ -681,9 +776,11 @@ struct llama_server_context
|
||||
}
|
||||
|
||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||
slot_params default_params;
|
||||
llama_slot_params default_params;
|
||||
common_params_sampling default_sparams;
|
||||
|
||||
|
||||
default_sparams.speculative = params_base.speculative;
|
||||
|
||||
slot->params.stream = json_value(data, "stream", false);
|
||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||
@@ -707,6 +804,15 @@ struct llama_server_context
|
||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||
|
||||
|
||||
slot->sparams.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
|
||||
slot->sparams.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
|
||||
slot->sparams.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
|
||||
|
||||
slot->sparams.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min);
|
||||
slot->sparams.speculative.n_min = std::max(params.speculative.n_min, 2);
|
||||
slot->sparams.speculative.n_max = std::max(params.speculative.n_max, 0);
|
||||
|
||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||
// Might be better to reject the request with a 400 ?
|
||||
LOG_WARNING("Max tokens to predict exceeds server configuration", {
|
||||
@@ -2024,6 +2130,97 @@ struct llama_server_context
|
||||
}
|
||||
}
|
||||
|
||||
// do speculative decoding
|
||||
for (auto & slot : slots) {
|
||||
if (!slot.is_processing() || !(ctx_dft && params.speculative.n_max > 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (slot.state != PROCESSING) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// determine the max draft that fits the current slot state
|
||||
int n_draft_max = slot.params.speculative.n_max;
|
||||
|
||||
// note: n_past is not yet increased for the `id` token sampled above
|
||||
// also, need to leave space for 1 extra token to allow context shifts
|
||||
n_draft_max = std::min(n_draft_max, slot.n_ctx - slot.n_past - 2);
|
||||
|
||||
if (slot.n_remaining > 0) {
|
||||
n_draft_max = std::min(n_draft_max, slot.n_remaining - 1);
|
||||
}
|
||||
|
||||
LOG("max possible draft: %d\n", n_draft_max);
|
||||
|
||||
if (n_draft_max < slot.params.speculative.n_min) {
|
||||
LOG("the max possible draft is too small: %d < %d - skipping speculative decoding\n", n_draft_max, slot.params.speculative.n_min);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
llama_token id = slot.sampled;
|
||||
|
||||
struct common_speculative_params params_spec;
|
||||
params_spec.n_draft = n_draft_max;
|
||||
params_spec.n_reuse = llama_n_ctx(ctx_dft) - slot.params.speculative.n_max;
|
||||
params_spec.p_min = slot.params.speculative.p_min;
|
||||
|
||||
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id);
|
||||
|
||||
// ignore small drafts
|
||||
if (slot.params.speculative.n_min > (int) draft.size()) {
|
||||
LOG("ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// construct the speculation batch
|
||||
common_batch_clear(slot.batch_spec);
|
||||
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
|
||||
|
||||
for (size_t i = 0; i < draft.size(); ++i) {
|
||||
common_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
|
||||
}
|
||||
|
||||
LOG("decoding speculative batch, size = %d\n", slot.batch_spec.n_tokens);
|
||||
|
||||
llama_decode(ctx, slot.batch_spec);
|
||||
|
||||
// the accepted tokens from the speculation
|
||||
const auto ids = common_sampler_sample_and_accept_n(slot.ctx_sampling, ctx, draft);
|
||||
|
||||
slot.n_past += ids.size();
|
||||
slot.n_decoded += ids.size();
|
||||
|
||||
slot.cache_tokens.push_back(id);
|
||||
slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
|
||||
|
||||
llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
|
||||
|
||||
for (size_t i = 0; i < ids.size(); ++i) {
|
||||
completion_token_output result;
|
||||
|
||||
result.tok = ids[i];
|
||||
result.text_to_send = common_token_to_piece(ctx, result.tok, params.special);
|
||||
//result.prob = 1.0f; // set later
|
||||
|
||||
// TODO: set result.probs
|
||||
|
||||
if (!process_token(result, slot)) {
|
||||
// release slot because of stop condition
|
||||
slot.release();
|
||||
slot.print_timings();
|
||||
send_final_response(slot);
|
||||
metrics.on_prediction(slot);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG("accepted %d/%d draft tokens, new n_past = %d\n", (int) ids.size() - 1, (int) draft.size(), slot.n_past);
|
||||
}
|
||||
|
||||
|
||||
LOG_VERBOSE("slots updated", {});
|
||||
return true;
|
||||
}
|
||||
@@ -2296,6 +2493,30 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
params.cpuparams.n_threads = request->threads();
|
||||
params.n_gpu_layers = request->ngpulayers();
|
||||
params.n_batch = request->nbatch();
|
||||
params.speculative.model = request->draftmodel();
|
||||
|
||||
// If options is not NULL, parse options
|
||||
for (int i = 0; request->options()[i] != NULL; i++) {
|
||||
char *optname = strtok(request->options()[i], ":");
|
||||
char *optval = strtok(NULL, ":");
|
||||
if (optval == NULL) {
|
||||
optval = "true";
|
||||
}
|
||||
|
||||
if (!strcmp(optname, "speculative.n_gpu_layers")) {
|
||||
params.speculative.n_gpu_layers = std::stoi(optval);
|
||||
}
|
||||
if (!strcmp(optname, "speculative.n_ctx")) {
|
||||
params.speculative.n_ctx = std::stoi(optval);
|
||||
}
|
||||
}
|
||||
|
||||
if params.speculative.n_gpu_layers == 0 {
|
||||
params.speculative.n_gpu_layers = params.n_gpu_layers;
|
||||
}
|
||||
if params.speculative.n_ctx == 0 {
|
||||
params.speculative.n_ctx = params.n_ctx;
|
||||
}
|
||||
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
|
||||
//params.n_parallel = 1;
|
||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
||||
)
|
||||
|
||||
type Image struct {
|
||||
base.SingleThread
|
||||
stablediffusion *stablediffusion.StableDiffusion
|
||||
}
|
||||
|
||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||
var err error
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
||||
return err
|
||||
}
|
||||
|
||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
return image.stablediffusion.GenerateImage(
|
||||
int(opts.Height),
|
||||
int(opts.Width),
|
||||
int(opts.Mode),
|
||||
int(opts.Step),
|
||||
int(opts.Seed),
|
||||
opts.PositivePrompt,
|
||||
opts.NegativePrompt,
|
||||
opts.Dst)
|
||||
}
|
||||
@@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
|
||||
}
|
||||
|
||||
func isNormalized(k []float32) bool {
|
||||
var sum float32
|
||||
var sum float64
|
||||
|
||||
for _, v := range k {
|
||||
sum += v
|
||||
v64 := float64(v)
|
||||
sum += v64*v64
|
||||
}
|
||||
|
||||
return sum == 1.0
|
||||
s := math.Sqrt(sum)
|
||||
|
||||
return s >= 0.99 && s <= 1.01
|
||||
}
|
||||
|
||||
// TODO: This we could replace with handwritten SIMD code
|
||||
@@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
|
||||
dot += k1[i] * k2[i]
|
||||
}
|
||||
|
||||
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
|
||||
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
|
||||
|
||||
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
||||
return dot
|
||||
@@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
|
||||
|
||||
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
||||
|
||||
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
|
||||
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
|
||||
|
||||
return sim
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: install
|
||||
install: protogen
|
||||
install:
|
||||
bash install.sh
|
||||
$(MAKE) protogen
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
@@ -12,14 +13,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
bash protogen.sh
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing openvoice..."
|
||||
bash test.sh
|
||||
@echo "openvoice tested."
|
||||
rm -rf venv __pycache__
|
||||
94
backend/python/faster-whisper/backend.py
Executable file
94
backend/python/faster-whisper/backend.py
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
This is an extra gRPC server of LocalAI for Bark TTS
|
||||
"""
|
||||
from concurrent import futures
|
||||
import time
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
BackendServicer is the class that implements the gRPC service
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
def LoadModel(self, request, context):
|
||||
device = "cpu"
|
||||
# Get device
|
||||
# device = "cuda" if request.CUDA else "cpu"
|
||||
if request.CUDA:
|
||||
device = "cuda"
|
||||
|
||||
try:
|
||||
print("Preparing models, please wait", file=sys.stderr)
|
||||
self.model = WhisperModel(request.Model, device=device, compute_type="float16")
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
# Implement your logic here for the LoadModel service
|
||||
# Replace this with your desired response
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def AudioTranscription(self, request, context):
|
||||
resultSegments = []
|
||||
text = ""
|
||||
try:
|
||||
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
|
||||
id = 0
|
||||
for segment in segments:
|
||||
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
||||
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
|
||||
text += segment.text
|
||||
id += 1
|
||||
except Exception as err:
|
||||
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||
|
||||
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
@@ -12,5 +12,3 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
python -m unidic download
|
||||
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
torch==2.4.1
|
||||
optimum-quanto
|
||||
9
backend/python/faster-whisper/requirements-cublas11.txt
Normal file
9
backend/python/faster-whisper/requirements-cublas11.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
torch==2.4.1
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
||||
3
backend/python/faster-whisper/requirements-hipblas.txt
Normal file
3
backend/python/faster-whisper/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
faster-whisper
|
||||
@@ -1,8 +1,6 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
transformers
|
||||
accelerate
|
||||
faster-whisper
|
||||
@@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
grpcio-tools
|
||||
@@ -1,29 +0,0 @@
|
||||
.PHONY: mamba
|
||||
mamba: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running mamba..."
|
||||
bash run.sh
|
||||
@echo "mamba run."
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing mamba..."
|
||||
bash test.sh
|
||||
@echo "mamba tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv __pycache__
|
||||
@@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the mamba project
|
||||
|
||||
```
|
||||
make mamba
|
||||
```
|
||||
@@ -1,179 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from concurrent import futures
|
||||
import time
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer that implements the Backend service defined in backend.proto.
|
||||
"""
|
||||
def generate(self,prompt, max_new_tokens):
|
||||
"""
|
||||
Generates text based on the given prompt and maximum number of new tokens.
|
||||
|
||||
Args:
|
||||
prompt (str): The prompt to generate text from.
|
||||
max_new_tokens (int): The maximum number of new tokens to generate.
|
||||
|
||||
Returns:
|
||||
str: The generated text.
|
||||
"""
|
||||
self.generator.end_beam_search()
|
||||
|
||||
# Tokenizing the input
|
||||
ids = self.generator.tokenizer.encode(prompt)
|
||||
|
||||
self.generator.gen_begin_reuse(ids)
|
||||
initial_len = self.generator.sequence[0].shape[0]
|
||||
has_leading_space = False
|
||||
decoded_text = ''
|
||||
for i in range(max_new_tokens):
|
||||
token = self.generator.gen_single_token()
|
||||
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
||||
has_leading_space = True
|
||||
|
||||
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
|
||||
if has_leading_space:
|
||||
decoded_text = ' ' + decoded_text
|
||||
|
||||
if token.item() == self.generator.tokenizer.eos_token_id:
|
||||
break
|
||||
return decoded_text
|
||||
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
Returns a health check message.
|
||||
|
||||
Args:
|
||||
request: The health check request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Reply: The health check reply.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
Loads a language model.
|
||||
|
||||
Args:
|
||||
request: The load model request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The load model result.
|
||||
"""
|
||||
try:
|
||||
tokenizerModel = request.Tokenizer
|
||||
if tokenizerModel == "":
|
||||
tokenizerModel = request.Model
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
|
||||
if MAMBA_CHAT:
|
||||
tokenizer.eos_token = "<|endoftext|>"
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
self.tokenizer = tokenizer
|
||||
self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Predict(self, request, context):
|
||||
"""
|
||||
Generates text based on the given prompt and sampling parameters.
|
||||
|
||||
Args:
|
||||
request: The predict request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The predict result.
|
||||
"""
|
||||
if request.TopP == 0:
|
||||
request.TopP = 0.9
|
||||
|
||||
max_tokens = request.Tokens
|
||||
|
||||
if request.Tokens == 0:
|
||||
max_tokens = 2000
|
||||
|
||||
# encoded_input = self.tokenizer(request.Prompt)
|
||||
tokens = self.tokenizer(request.Prompt, return_tensors="pt")
|
||||
input_ids = tokens.input_ids.to(device="cuda")
|
||||
out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
|
||||
top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
|
||||
|
||||
decoded = self.tokenizer.batch_decode(out)
|
||||
|
||||
generated_text = decoded[0]
|
||||
|
||||
# Remove prompt from response if present
|
||||
if request.Prompt in generated_text:
|
||||
generated_text = generated_text.replace(request.Prompt, "")
|
||||
|
||||
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
"""
|
||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
||||
|
||||
Args:
|
||||
request: The predict stream request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The predict stream result.
|
||||
"""
|
||||
yield self.Predict(request, context)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
LIMIT_TARGETS="cublas"
|
||||
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
installRequirements
|
||||
@@ -1,2 +0,0 @@
|
||||
causal-conv1d==1.4.0
|
||||
mamba-ssm==2.2.2
|
||||
@@ -1,2 +0,0 @@
|
||||
torch==2.4.1
|
||||
transformers
|
||||
@@ -1,3 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
transformers
|
||||
@@ -1,2 +0,0 @@
|
||||
torch==2.4.1
|
||||
transformers
|
||||
@@ -1,6 +0,0 @@
|
||||
# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
|
||||
# this also means that we need to install the basic build dependencies into the venv ourselves
|
||||
# https://github.com/Dao-AILab/causal-conv1d/issues/24
|
||||
packaging
|
||||
setuptools
|
||||
wheel
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
LIMIT_TARGETS="cublas"
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
@@ -1,76 +0,0 @@
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import grpc
|
||||
import backend_pb2_grpc
|
||||
import backend_pb2
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service.
|
||||
|
||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
||||
"""
|
||||
def setUp(self):
|
||||
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_text(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
||||
self.assertTrue(response.success)
|
||||
req = backend_pb2.PredictOptions(Prompt="The capital of France is")
|
||||
resp = stub.Predict(req)
|
||||
self.assertIsNotNone(resp.message)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("text service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,158 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for OpenVoice models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import torch
|
||||
from openvoice import se_extractor
|
||||
from openvoice.api import ToneColorConverter
|
||||
from melo.api import TTS
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
|
||||
self.clonedVoice = False
|
||||
# Assume directory from request.ModelFile.
|
||||
# Only if request.LoraAdapter it's not an absolute path
|
||||
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
|
||||
# get base path of modelFile
|
||||
modelFileBase = os.path.dirname(request.ModelFile)
|
||||
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
||||
if request.AudioPath != "":
|
||||
self.clonedVoice = True
|
||||
|
||||
self.modelpath = request.ModelFile
|
||||
self.speaker = request.Type
|
||||
self.ClonedVoicePath = request.AudioPath
|
||||
|
||||
ckpt_converter = request.Model+'/converter'
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
self.device = device
|
||||
self.tone_color_converter = None
|
||||
if self.clonedVoice:
|
||||
self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
|
||||
self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
||||
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
# Speed is adjustable
|
||||
speed = 1.0
|
||||
voice = "EN"
|
||||
if request.voice:
|
||||
voice = request.voice
|
||||
model = TTS(language=voice, device=self.device)
|
||||
speaker_ids = model.hps.data.spk2id
|
||||
speaker_key = self.speaker
|
||||
modelpath = self.modelpath
|
||||
for s in speaker_ids.keys():
|
||||
print(f"Speaker: {s} - ID: {speaker_ids[s]}")
|
||||
speaker_id = speaker_ids[speaker_key]
|
||||
speaker_key = speaker_key.lower().replace('_', '-')
|
||||
source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
|
||||
model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
|
||||
if self.clonedVoice:
|
||||
reference_speaker = self.ClonedVoicePath
|
||||
target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
|
||||
# Run the tone color converter
|
||||
encode_message = "@MyShell"
|
||||
self.tone_color_converter.convert(
|
||||
audio_src_path=request.dst,
|
||||
src_se=source_se,
|
||||
tgt_se=target_se,
|
||||
output_path=request.dst,
|
||||
message=encode_message)
|
||||
|
||||
print("[OpenVoice] TTS generated!", file=sys.stderr)
|
||||
print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("[OpenVoice] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(f"[OpenVoice] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
||||
@@ -1,7 +0,0 @@
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,8 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,7 +0,0 @@
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,8 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
@@ -1,24 +0,0 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==0.9.0
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
whisper-timestamped==1.14.2
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin==0.50.0
|
||||
cn2an==0.5.22
|
||||
jieba==0.42.1
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
@@ -1,17 +0,0 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
librosa
|
||||
faster-whisper
|
||||
inflect
|
||||
unidecode
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin
|
||||
cn2an==0.5.22
|
||||
numpy==1.22.0
|
||||
networkx==2.8.8
|
||||
jieba==0.42.1
|
||||
gradio==5.9.1
|
||||
langid==1.1.6
|
||||
llvmlite==0.43.0
|
||||
setuptools
|
||||
@@ -1,82 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(30)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2",
|
||||
Type="en-us"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Download checkpoints if not present
|
||||
if [ ! -d "checkpoints_v2" ]; then
|
||||
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
|
||||
unzip checkpoints_v2.zip
|
||||
fi
|
||||
|
||||
runUnittests
|
||||
@@ -1,44 +0,0 @@
|
||||
export CONDA_ENV_PATH = "parler.yml"
|
||||
SKIP_CONDA?=0
|
||||
ifeq ($(BUILD_TYPE), cublas)
|
||||
export CONDA_ENV_PATH = "parler-nvidia.yml"
|
||||
endif
|
||||
|
||||
# Intel GPU are supposed to have dependencies installed in the main python
|
||||
# environment, so we skip conda installation for SYCL builds.
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: parler-tts
|
||||
parler-tts:
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
bash install.sh $(CONDA_ENV_PATH)
|
||||
$(MAKE) protogen
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running transformers..."
|
||||
bash run.sh
|
||||
@echo "transformers run."
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing transformers..."
|
||||
bash test.sh
|
||||
@echo "transformers tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
bash protogen.sh
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv __pycache__
|
||||
@@ -1,125 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for MusicgenForConditionalGeneration models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from scipy.io.wavfile import write as write_wav
|
||||
|
||||
from parler_tts import ParlerTTSForConditionalGeneration
|
||||
from transformers import AutoTokenizer
|
||||
import soundfile as sf
|
||||
import torch
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
try:
|
||||
self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
voice = request.voice
|
||||
if voice == "":
|
||||
voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
|
||||
prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
|
||||
|
||||
generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
||||
audio_arr = generation.cpu().numpy().squeeze()
|
||||
print("[parler-tts] TTS generated!", file=sys.stderr)
|
||||
sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
|
||||
print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
|
||||
print("[parler-tts] TTS for", file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("[parler-tts] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(f"[parler-tts] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
|
||||
installRequirements
|
||||
|
||||
|
||||
# https://github.com/descriptinc/audiotools/issues/101
|
||||
# incompatible protobuf versions.
|
||||
PYDIR=python3.10
|
||||
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
||||
|
||||
if [ ! -d ${pyenv} ]; then
|
||||
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
||||
@@ -1,4 +0,0 @@
|
||||
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
grpcio-tools==1.42.0
|
||||
@@ -1,3 +0,0 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1
|
||||
@@ -1,5 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +0,0 @@
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,5 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.3.0+rocm6.0
|
||||
torchaudio==2.3.0+rocm6.0
|
||||
transformers
|
||||
accelerate
|
||||
@@ -1,4 +0,0 @@
|
||||
grpcio==1.69.0
|
||||
certifi
|
||||
llvmlite==0.43.0
|
||||
setuptools
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
@@ -1,81 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
@@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -21,7 +21,7 @@ import torch.cuda
|
||||
|
||||
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
|
||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||
from scipy.io import wavfile
|
||||
import outetts
|
||||
@@ -245,6 +245,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
autoTokenizer = False
|
||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
self.SentenceTransformer = True
|
||||
elif request.Type == "Mamba":
|
||||
autoTokenizer = False
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
self.model = MambaForCausalLM.from_pretrained(model_name)
|
||||
else:
|
||||
print("Automodel", file=sys.stderr)
|
||||
self.model = AutoModel.from_pretrained(model_name,
|
||||
|
||||
@@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
if (u & FLAG_IMAGE) == FLAG_IMAGE {
|
||||
imageBackends := []string{"diffusers", "stablediffusion"}
|
||||
imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
|
||||
if !slices.Contains(imageBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -48,5 +48,66 @@ var _ = Describe("Test cases for config related functions", func() {
|
||||
// config should includes whisper-1 models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("whisper-1"))
|
||||
})
|
||||
|
||||
It("Test new loadconfig", func() {
|
||||
|
||||
bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
|
||||
err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
|
||||
Expect(err).To(BeNil())
|
||||
configs := bcl.GetAllBackendConfigs()
|
||||
loadedModelNames := []string{}
|
||||
for _, v := range configs {
|
||||
loadedModelNames = append(loadedModelNames, v.Name)
|
||||
}
|
||||
Expect(configs).ToNot(BeNil())
|
||||
totalModels := len(loadedModelNames)
|
||||
|
||||
Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
|
||||
|
||||
// config should includes text-embedding-ada-002 models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
|
||||
|
||||
// config should includes rwkv_test models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("rwkv_test"))
|
||||
|
||||
// config should includes whisper-1 models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("whisper-1"))
|
||||
|
||||
// create a temp directory and store a temporary model
|
||||
tmpdir, err := os.MkdirTemp("", "test")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer os.RemoveAll(tmpdir)
|
||||
|
||||
// create a temporary model
|
||||
model := `name: "test-model"
|
||||
description: "test model"
|
||||
options:
|
||||
- foo
|
||||
- bar
|
||||
- baz
|
||||
`
|
||||
modelFile := tmpdir + "/test-model.yaml"
|
||||
err = os.WriteFile(modelFile, []byte(model), 0644)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = bcl.LoadBackendConfigsFromPath(tmpdir)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
configs = bcl.GetAllBackendConfigs()
|
||||
Expect(len(configs)).ToNot(Equal(totalModels))
|
||||
|
||||
loadedModelNames = []string{}
|
||||
var testModel BackendConfig
|
||||
for _, v := range configs {
|
||||
loadedModelNames = append(loadedModelNames, v.Name)
|
||||
if v.Name == "test-model" {
|
||||
testModel = v
|
||||
}
|
||||
}
|
||||
Expect(loadedModelNames).To(ContainElements("test-model"))
|
||||
Expect(testModel.Description).To(Equal("test model"))
|
||||
Expect(testModel.Options).To(ContainElements("foo", "bar", "baz"))
|
||||
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -687,6 +687,10 @@ var _ = Describe("API test", func() {
|
||||
Name: "model-gallery",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
|
||||
},
|
||||
{
|
||||
Name: "localai",
|
||||
URL: "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
|
||||
},
|
||||
}
|
||||
|
||||
application, err := application.New(
|
||||
@@ -764,10 +768,8 @@ var _ = Describe("API test", func() {
|
||||
}
|
||||
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ID: "model-gallery@stablediffusion",
|
||||
Overrides: map[string]interface{}{
|
||||
"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
|
||||
},
|
||||
ID: "localai@sd-1.5-ggml",
|
||||
Name: "stablediffusion",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@@ -778,14 +780,14 @@ var _ = Describe("API test", func() {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
}, "1200s", "10s").Should(Equal(true))
|
||||
|
||||
resp, err := http.Post(
|
||||
"http://127.0.0.1:9090/v1/images/generations",
|
||||
"application/json",
|
||||
bytes.NewBuffer([]byte(`{
|
||||
"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
|
||||
"mode": 2, "seed":9000,
|
||||
"prompt": "a lovely cat",
|
||||
"step": 1, "seed":9000,
|
||||
"size": "256x256", "n":2}`)))
|
||||
// The response should contain an URL
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
|
||||
@@ -794,6 +796,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
imgUrlResp := &schema.OpenAIResponse{}
|
||||
err = json.Unmarshal(dat, imgUrlResp)
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
|
||||
Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
|
||||
imgUrl := imgUrlResp.Data[0].URL
|
||||
Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)
|
||||
|
||||
@@ -28,7 +28,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
|
||||
}
|
||||
}
|
||||
|
||||
// BackendMonitorEndpoint shuts down the specified backend
|
||||
// BackendShutdownEndpoint shuts down the specified backend
|
||||
// @Summary Backend monitor endpoint
|
||||
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
|
||||
// @Router /backend/shutdown [post]
|
||||
|
||||
@@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
}
|
||||
|
||||
if m == "" {
|
||||
m = model.StableDiffusionBackend
|
||||
m = "stablediffusion"
|
||||
}
|
||||
log.Debug().Msgf("Loading model: %+v", m)
|
||||
|
||||
@@ -129,9 +129,9 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
|
||||
switch config.Backend {
|
||||
case "stablediffusion":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
config.Backend = model.StableDiffusionGGMLBackend
|
||||
case "":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
config.Backend = model.StableDiffusionGGMLBackend
|
||||
}
|
||||
|
||||
if !strings.Contains(input.Size, "x") {
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
@@ -296,6 +297,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a quality was defined as number, convert it to step
|
||||
if input.Quality != "" {
|
||||
q, err := strconv.Atoi(input.Quality)
|
||||
if err == nil {
|
||||
config.Step = q
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
|
||||
|
||||
@@ -191,8 +191,9 @@ type OpenAIRequest struct {
|
||||
Stream bool `json:"stream"`
|
||||
|
||||
// Image (not supported by OpenAI)
|
||||
Mode int `json:"mode"`
|
||||
Step int `json:"step"`
|
||||
Mode int `json:"mode"`
|
||||
Quality string `json:"quality"`
|
||||
Step int `json:"step"`
|
||||
|
||||
// A grammar to constrain the LLM output
|
||||
Grammar string `json:"grammar" yaml:"grammar"`
|
||||
|
||||
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
Submodule docs/themes/hugo-theme-relearn updated: 80e448e5bd...8dad5ee419
@@ -5219,6 +5219,23 @@
|
||||
- filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
|
||||
sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
|
||||
uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "deepseek-r1-distill-llama-8b"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
||||
- https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF
|
||||
description: |
|
||||
DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
|
||||
Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
|
||||
By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
|
||||
sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
|
||||
uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
|
||||
- &deepseek ## Deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||
name: "deepseek-coder-v2-lite-instruct"
|
||||
@@ -5284,6 +5301,86 @@
|
||||
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
|
||||
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
- &deepseek-r1 ## Start DeepSeek-R1
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "deepseek-r1-distill-qwen-1.5b"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
|
||||
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
|
||||
description: |
|
||||
DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
|
||||
Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
|
||||
By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
|
||||
sha256: 1741e5b2d062b07acf048bf0d2c514dadf2a48f94e2b4aa0cfe069af3838ee2f
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "deepseek-r1-distill-qwen-7b"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
||||
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
|
||||
sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "deepseek-r1-distill-qwen-14b"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
|
||||
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
|
||||
sha256: 0b319bd0572f2730bfe11cc751defe82045fad5085b4e60591ac2cd2d9633181
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "deepseek-r1-distill-qwen-32b"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
|
||||
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
|
||||
sha256: bed9b0f551f5b95bf9da5888a48f0f87c37ad6b72519c4cbd775f54ac0b9fc62
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "deepseek-r1-distill-llama-8b"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
||||
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
|
||||
sha256: 87bcba20b4846d8dadf753d3ff48f9285d131fc95e3e0e7e934d4f20bc896f5d
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
|
||||
- !!merge <<: *deepseek-r1
|
||||
name: "deepseek-r1-distill-llama-70b"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
|
||||
- https://huggingface.co/bartowski/DeepSeek-R 1-Distill-Llama-70B-GGUF
|
||||
overrides:
|
||||
parameters:
|
||||
model: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
|
||||
sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
|
||||
- &qwen2 ## Start QWEN2
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "qwen2-7b-instruct"
|
||||
@@ -5617,6 +5714,32 @@
|
||||
- filename: marco-o1-uncensored.Q4_K_M.gguf
|
||||
sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
|
||||
uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "minicpm-o-2_6"
|
||||
icon: https://avatars.githubusercontent.com/u/89920203
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-o-2_6
|
||||
description: |
|
||||
MiniCPM-o 2.6 is the latest and most capable model in the MiniCPM-o series. The model is built in an end-to-end fashion based on SigLip-400M, Whisper-medium-300M, ChatTTS-200M, and Qwen2.5-7B with a total of 8B parameters
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- qwen2
|
||||
- cpu
|
||||
overrides:
|
||||
mmproj: minicpm-o-2_6-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: minicpm-o-2_6-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: minicpm-o-2_6-Q4_K_M.gguf
|
||||
sha256: 4f635fc0c0bb88d50ccd9cf1f1e5892b5cb085ff88fe0d8e1148fd9a8a836bc2
|
||||
uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/Model-7.6B-Q4_K_M.gguf
|
||||
- filename: minicpm-o-2_6-mmproj-f16.gguf
|
||||
sha256: efa4f7d96aa0f838f2023fc8d28e519179b16f1106777fa9280b32628191aa3e
|
||||
uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/mmproj-model-f16.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "minicpm-v-2_6"
|
||||
license: apache-2.0
|
||||
@@ -11014,7 +11137,7 @@
|
||||
uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
|
||||
sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
|
||||
- name: stable-diffusion-3-medium
|
||||
icon: https://huggingface.co/leo009/stable-diffusion-3-medium/resolve/main/sd3demo.jpg
|
||||
icon: https://avatars.githubusercontent.com/u/100950301
|
||||
license: other
|
||||
description: |
|
||||
Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
|
||||
@@ -11028,6 +11151,63 @@
|
||||
- sd-3
|
||||
- gpu
|
||||
url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
|
||||
- name: sd-1.5-ggml
|
||||
icon: https://avatars.githubusercontent.com/u/37351293
|
||||
license: creativeml-openrail-m
|
||||
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
|
||||
description: |
|
||||
Stable Diffusion 1.5
|
||||
urls:
|
||||
- https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF
|
||||
tags:
|
||||
- text-to-image
|
||||
- stablediffusion
|
||||
- gpu
|
||||
- cpu
|
||||
overrides:
|
||||
options:
|
||||
- "sampler:euler"
|
||||
parameters:
|
||||
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
||||
files:
|
||||
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
||||
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
- name: sd-3.5-medium-ggml
|
||||
license: stabilityai-ai-community
|
||||
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
|
||||
description: |
|
||||
Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
|
||||
urls:
|
||||
- https://huggingface.co/stabilityai/stable-diffusion-3.5-medium
|
||||
- https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF
|
||||
tags:
|
||||
- text-to-image
|
||||
- stablediffusion
|
||||
- gpu
|
||||
- cpu
|
||||
icon: https://avatars.githubusercontent.com/u/100950301
|
||||
overrides:
|
||||
options:
|
||||
- "clip_l_path:clip_l-Q4_0.gguf"
|
||||
- "clip_g_path:clip_g-Q4_0.gguf"
|
||||
- "t5xxl_path:t5xxl-Q4_0.gguf"
|
||||
- "sampler:euler"
|
||||
parameters:
|
||||
model: sd3.5_medium-Q4_0.gguf
|
||||
files:
|
||||
- filename: "sd3.5_medium-Q4_0.gguf"
|
||||
sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf"
|
||||
uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf"
|
||||
- filename: clip_g-Q4_0.gguf
|
||||
sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf
|
||||
- filename: clip_l-Q4_0.gguf
|
||||
sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf
|
||||
- filename: t5xxl-Q4_0.gguf
|
||||
sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf
|
||||
- name: sd-3.5-large-ggml
|
||||
license: stabilityai-ai-community
|
||||
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
|
||||
@@ -11038,10 +11218,10 @@
|
||||
- https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
|
||||
tags:
|
||||
- text-to-image
|
||||
- flux
|
||||
- stablediffusion
|
||||
- gpu
|
||||
- cpu
|
||||
icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png
|
||||
icon: https://avatars.githubusercontent.com/u/100950301
|
||||
overrides:
|
||||
parameters:
|
||||
model: sd3.5_large-Q4_0.gguf
|
||||
@@ -11060,6 +11240,7 @@
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
|
||||
- &flux
|
||||
name: flux.1-dev
|
||||
icon: https://avatars.githubusercontent.com/u/164064024
|
||||
license: flux-1-dev-non-commercial-license
|
||||
description: |
|
||||
FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
|
||||
@@ -11083,7 +11264,6 @@
|
||||
- !!merge <<: *flux
|
||||
name: flux.1-schnell
|
||||
license: apache-2
|
||||
icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
|
||||
description: |
|
||||
FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
|
||||
Key Features
|
||||
@@ -11116,7 +11296,6 @@
|
||||
- flux
|
||||
- gpu
|
||||
- cpu
|
||||
icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
|
||||
overrides:
|
||||
parameters:
|
||||
model: flux1-dev-Q2_K.gguf
|
||||
@@ -11136,6 +11315,7 @@
|
||||
- &whisper ## Whisper
|
||||
url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
|
||||
name: "whisper-1"
|
||||
icon: https://avatars.githubusercontent.com/u/14957082
|
||||
license: "MIT"
|
||||
urls:
|
||||
- https://github.com/ggerganov/whisper.cpp
|
||||
@@ -11313,6 +11493,7 @@
|
||||
description: |
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
name: stablediffusion-cpp
|
||||
icon: https://avatars.githubusercontent.com/u/100950301
|
||||
- &piper ## Piper TTS
|
||||
url: github:mudler/LocalAI/gallery/piper.yaml@master
|
||||
name: voice-en-us-kathleen-low
|
||||
@@ -11893,6 +12074,7 @@
|
||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
|
||||
sha256: 0299a5e7f481ba853404e9f0e1515a94d5409585d76963fa4d30c64bd630aa99
|
||||
- name: "silero-vad"
|
||||
icon: https://github.com/snakers4/silero-models/raw/master/files/silero_logo.jpg
|
||||
url: github:mudler/LocalAI/gallery/virtual.yaml@master
|
||||
urls:
|
||||
- https://github.com/snakers4/silero-vad
|
||||
@@ -11912,6 +12094,7 @@
|
||||
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
|
||||
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
|
||||
- name: "bark-cpp-small"
|
||||
icon: https://avatars.githubusercontent.com/u/99442120
|
||||
url: github:mudler/LocalAI/gallery/virtual.yaml@master
|
||||
license: mit
|
||||
urls:
|
||||
|
||||
@@ -21,14 +21,16 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
HuggingFacePrefix = "huggingface://"
|
||||
OCIPrefix = "oci://"
|
||||
OllamaPrefix = "ollama://"
|
||||
HTTPPrefix = "http://"
|
||||
HTTPSPrefix = "https://"
|
||||
GithubURI = "github:"
|
||||
GithubURI2 = "github://"
|
||||
LocalPrefix = "file://"
|
||||
HuggingFacePrefix = "huggingface://"
|
||||
HuggingFacePrefix1 = "hf://"
|
||||
HuggingFacePrefix2 = "hf.co/"
|
||||
OCIPrefix = "oci://"
|
||||
OllamaPrefix = "ollama://"
|
||||
HTTPPrefix = "http://"
|
||||
HTTPSPrefix = "https://"
|
||||
GithubURI = "github:"
|
||||
GithubURI2 = "github://"
|
||||
LocalPrefix = "file://"
|
||||
)
|
||||
|
||||
type URI string
|
||||
@@ -127,6 +129,8 @@ func (u URI) LooksLikeURL() bool {
|
||||
return strings.HasPrefix(string(u), HTTPPrefix) ||
|
||||
strings.HasPrefix(string(u), HTTPSPrefix) ||
|
||||
strings.HasPrefix(string(u), HuggingFacePrefix) ||
|
||||
strings.HasPrefix(string(u), HuggingFacePrefix1) ||
|
||||
strings.HasPrefix(string(u), HuggingFacePrefix2) ||
|
||||
strings.HasPrefix(string(u), GithubURI) ||
|
||||
strings.HasPrefix(string(u), OllamaPrefix) ||
|
||||
strings.HasPrefix(string(u), OCIPrefix) ||
|
||||
@@ -170,8 +174,10 @@ func (s URI) ResolveURL() string {
|
||||
projectPath := strings.Join(repoPath[2:], "/")
|
||||
|
||||
return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
|
||||
case strings.HasPrefix(string(s), HuggingFacePrefix):
|
||||
case strings.HasPrefix(string(s), HuggingFacePrefix) || strings.HasPrefix(string(s), HuggingFacePrefix1) || strings.HasPrefix(string(s), HuggingFacePrefix2):
|
||||
repository := strings.Replace(string(s), HuggingFacePrefix, "", 1)
|
||||
repository = strings.Replace(repository, HuggingFacePrefix1, "", 1)
|
||||
repository = strings.Replace(repository, HuggingFacePrefix2, "", 1)
|
||||
// convert repository to a full URL.
|
||||
// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
|
||||
owner := strings.Split(repository, "/")[0]
|
||||
|
||||
@@ -34,7 +34,7 @@ type Tool struct {
|
||||
}
|
||||
type Tools []Tool
|
||||
|
||||
// ToJSONNameStructure converts a list of functions to a JSON structure that can be parsed to a grammar
|
||||
// ToJSONStructure converts a list of functions to a JSON structure that can be parsed to a grammar
|
||||
// This allows the LLM to return a response of the type: { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
|
||||
func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
|
||||
nameKey := defaultFunctionNameKey
|
||||
|
||||
@@ -29,11 +29,14 @@ var Aliases map[string]string = map[string]string{
|
||||
"langchain-huggingface": LCHuggingFaceBackend,
|
||||
"transformers-musicgen": TransformersBackend,
|
||||
"sentencetransformers": TransformersBackend,
|
||||
"mamba": TransformersBackend,
|
||||
"stablediffusion": StableDiffusionGGMLBackend,
|
||||
}
|
||||
|
||||
var TypeAlias map[string]string = map[string]string{
|
||||
"sentencetransformers": "SentenceTransformer",
|
||||
"huggingface-embeddings": "SentenceTransformer",
|
||||
"mamba": "Mamba",
|
||||
"transformers-musicgen": "MusicgenForConditionalGeneration",
|
||||
}
|
||||
|
||||
@@ -45,6 +48,7 @@ const (
|
||||
LLamaCPP = "llama-cpp"
|
||||
|
||||
LLamaCPPAVX2 = "llama-cpp-avx2"
|
||||
LLamaCPPAVX512 = "llama-cpp-avx512"
|
||||
LLamaCPPAVX = "llama-cpp-avx"
|
||||
LLamaCPPFallback = "llama-cpp-fallback"
|
||||
LLamaCPPCUDA = "llama-cpp-cuda"
|
||||
@@ -54,15 +58,27 @@ const (
|
||||
|
||||
LLamaCPPGRPC = "llama-cpp-grpc"
|
||||
|
||||
WhisperBackend = "whisper"
|
||||
StableDiffusionBackend = "stablediffusion"
|
||||
PiperBackend = "piper"
|
||||
LCHuggingFaceBackend = "huggingface"
|
||||
WhisperBackend = "whisper"
|
||||
StableDiffusionGGMLBackend = "stablediffusion-ggml"
|
||||
PiperBackend = "piper"
|
||||
LCHuggingFaceBackend = "huggingface"
|
||||
|
||||
TransformersBackend = "transformers"
|
||||
LocalStoreBackend = "local-store"
|
||||
)
|
||||
|
||||
var llamaCPPVariants = []string{
|
||||
LLamaCPPAVX2,
|
||||
LLamaCPPAVX512,
|
||||
LLamaCPPAVX,
|
||||
LLamaCPPFallback,
|
||||
LLamaCPPCUDA,
|
||||
LLamaCPPHipblas,
|
||||
LLamaCPPSycl16,
|
||||
LLamaCPPSycl32,
|
||||
LLamaCPPGRPC,
|
||||
}
|
||||
|
||||
func backendPath(assetDir, backend string) string {
|
||||
return filepath.Join(assetDir, "backend-assets", "grpc", backend)
|
||||
}
|
||||
@@ -104,40 +120,14 @@ ENTRY:
|
||||
if AutoDetect {
|
||||
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
|
||||
// when starting the service
|
||||
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas, foundSycl16, foundSycl32 := false, false, false, false, false, false, false, false
|
||||
foundVariants := map[string]bool{}
|
||||
if _, ok := backends[LLamaCPP]; !ok {
|
||||
for _, e := range entry {
|
||||
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2)
|
||||
foundLCPPAVX2 = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX)
|
||||
foundLCPPAVX = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
|
||||
foundLCPPFallback = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPGRPC) && !foundLCPPGRPC {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
|
||||
foundLCPPGRPC = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
|
||||
foundLCPPCuda = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
|
||||
foundLCPPHipblas = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPSycl16) && !foundSycl16 {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl16)
|
||||
foundSycl16 = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPSycl32) && !foundSycl32 {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl32)
|
||||
foundSycl32 = true
|
||||
for _, v := range llamaCPPVariants {
|
||||
if strings.Contains(e.Name(), v) && !foundVariants[v] {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], v)
|
||||
foundVariants[v] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -280,6 +270,12 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
|
||||
selectedProcess = p
|
||||
}
|
||||
} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
|
||||
p := backendPath(assetDir, LLamaCPPAVX512)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
|
||||
selectedProcess = p
|
||||
}
|
||||
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
|
||||
p := backendPath(assetDir, LLamaCPPAVX)
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
//go:build stablediffusion
|
||||
// +build stablediffusion
|
||||
|
||||
package stablediffusion
|
||||
|
||||
import (
|
||||
stableDiffusion "github.com/mudler/go-stable-diffusion"
|
||||
)
|
||||
|
||||
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
|
||||
if height > 512 || width > 512 {
|
||||
return stableDiffusion.GenerateImageUpscaled(
|
||||
height,
|
||||
width,
|
||||
step,
|
||||
seed,
|
||||
positive_prompt,
|
||||
negative_prompt,
|
||||
dst,
|
||||
asset_dir,
|
||||
)
|
||||
}
|
||||
return stableDiffusion.GenerateImage(
|
||||
height,
|
||||
width,
|
||||
mode,
|
||||
step,
|
||||
seed,
|
||||
positive_prompt,
|
||||
negative_prompt,
|
||||
dst,
|
||||
"",
|
||||
asset_dir,
|
||||
)
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
//go:build !stablediffusion
|
||||
// +build !stablediffusion
|
||||
|
||||
package stablediffusion
|
||||
|
||||
import "fmt"
|
||||
|
||||
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
|
||||
return fmt.Errorf("This version of LocalAI was built without the stablediffusion tag")
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
package stablediffusion
|
||||
|
||||
import "os"
|
||||
|
||||
type StableDiffusion struct {
|
||||
assetDir string
|
||||
}
|
||||
|
||||
func New(assetDir string) (*StableDiffusion, error) {
|
||||
if _, err := os.Stat(assetDir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &StableDiffusion{
|
||||
assetDir: assetDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *StableDiffusion) GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string) error {
|
||||
return GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst, s.assetDir)
|
||||
}
|
||||
@@ -1645,6 +1645,9 @@ const docTemplate = `{
|
||||
"prompt": {
|
||||
"description": "Prompt is read only by completion/image API calls"
|
||||
},
|
||||
"quality": {
|
||||
"type": "string"
|
||||
},
|
||||
"repeat_last_n": {
|
||||
"type": "integer"
|
||||
},
|
||||
|
||||
@@ -1638,6 +1638,9 @@
|
||||
"prompt": {
|
||||
"description": "Prompt is read only by completion/image API calls"
|
||||
},
|
||||
"quality": {
|
||||
"type": "string"
|
||||
},
|
||||
"repeat_last_n": {
|
||||
"type": "integer"
|
||||
},
|
||||
|
||||
@@ -570,6 +570,8 @@ definitions:
|
||||
type: number
|
||||
prompt:
|
||||
description: Prompt is read only by completion/image API calls
|
||||
quality:
|
||||
type: string
|
||||
repeat_last_n:
|
||||
type: integer
|
||||
repeat_penalty:
|
||||
|
||||
@@ -54,7 +54,7 @@ var _ = BeforeSuite(func() {
|
||||
Eventually(func() error {
|
||||
_, err := client.ListModels(context.TODO())
|
||||
return err
|
||||
}, "20m").ShouldNot(HaveOccurred())
|
||||
}, "50m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
|
||||
var _ = AfterSuite(func() {
|
||||
|
||||
@@ -123,8 +123,9 @@ var _ = Describe("E2E test", func() {
|
||||
It("correctly", func() {
|
||||
resp, err := client.CreateImage(context.TODO(),
|
||||
openai.ImageRequest{
|
||||
Prompt: "test",
|
||||
Size: openai.CreateImageSize512x512,
|
||||
Prompt: "test",
|
||||
Quality: "1",
|
||||
Size: openai.CreateImageSize256x256,
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -135,7 +136,8 @@ var _ = Describe("E2E test", func() {
|
||||
resp, err := client.CreateImage(context.TODO(),
|
||||
openai.ImageRequest{
|
||||
Prompt: "test",
|
||||
Size: openai.CreateImageSize512x512,
|
||||
Size: openai.CreateImageSize256x256,
|
||||
Quality: "1",
|
||||
ResponseFormat: openai.CreateImageResponseFormatURL,
|
||||
},
|
||||
)
|
||||
@@ -147,7 +149,8 @@ var _ = Describe("E2E test", func() {
|
||||
resp, err := client.CreateImage(context.TODO(),
|
||||
openai.ImageRequest{
|
||||
Prompt: "test",
|
||||
Size: openai.CreateImageSize512x512,
|
||||
Size: openai.CreateImageSize256x256,
|
||||
Quality: "1",
|
||||
ResponseFormat: openai.CreateImageResponseFormatB64JSON,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"embed"
|
||||
"math"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -22,6 +23,19 @@ import (
|
||||
//go:embed backend-assets/*
|
||||
var backendAssets embed.FS
|
||||
|
||||
func normalize(vecs [][]float32) {
|
||||
for i, k := range vecs {
|
||||
norm := float64(0)
|
||||
for _, x := range k {
|
||||
norm += float64(x * x)
|
||||
}
|
||||
norm = math.Sqrt(norm)
|
||||
for j, x := range k {
|
||||
vecs[i][j] = x / float32(norm)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
|
||||
Context("Embedded Store get,set and delete", func() {
|
||||
var sl *model.ModelLoader
|
||||
@@ -192,17 +206,8 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
|
||||
keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
|
||||
vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
|
||||
// normalize the keys
|
||||
for i, k := range keys {
|
||||
norm := float64(0)
|
||||
for _, x := range k {
|
||||
norm += float64(x * x)
|
||||
}
|
||||
norm = math.Sqrt(norm)
|
||||
for j, x := range k {
|
||||
keys[i][j] = x / float32(norm)
|
||||
}
|
||||
}
|
||||
|
||||
normalize(keys)
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -225,5 +230,121 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
Expect(ks[1]).To(Equal(keys[1]))
|
||||
Expect(vals[1]).To(Equal(vals[1]))
|
||||
})
|
||||
|
||||
It("It produces the correct cosine similarities for orthogonal and opposite unit vectors", func() {
|
||||
keys := [][]float32{{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}}
|
||||
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(sims).To(Equal([]float32{1.0, 0.0, 0.0, -1.0}))
|
||||
})
|
||||
|
||||
It("It produces the correct cosine similarities for orthogonal and opposite vectors", func() {
|
||||
keys := [][]float32{{1.0, 0.0, 1.0}, {0.0, 2.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, -1.0}}
|
||||
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(sims[0]).To(BeNumerically("~", 1, 0.1))
|
||||
Expect(sims[1]).To(BeNumerically("~", 0, 0.1))
|
||||
Expect(sims[2]).To(BeNumerically("~", -0.7, 0.1))
|
||||
Expect(sims[3]).To(BeNumerically("~", -1, 0.1))
|
||||
})
|
||||
|
||||
expectTriangleEq := func(keys [][]float32, vals [][]byte) {
|
||||
sims := map[string]map[string]float32{}
|
||||
|
||||
// compare every key vector pair and store the similarities in a lookup table
|
||||
// that uses the values as keys
|
||||
for i, k := range keys {
|
||||
_, valsk, simsk, err := store.Find(context.Background(), sc, k, 9)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for j, v := range valsk {
|
||||
p := string(vals[i])
|
||||
q := string(v)
|
||||
|
||||
if sims[p] == nil {
|
||||
sims[p] = map[string]float32{}
|
||||
}
|
||||
|
||||
//log.Debug().Strs("vals", []string{p, q}).Float32("similarity", simsk[j]).Send()
|
||||
|
||||
sims[p][q] = simsk[j]
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the triangle inequality holds for every combination of the triplet
|
||||
// u, v and w
|
||||
for _, simsu := range sims {
|
||||
for w, simw := range simsu {
|
||||
// acos(u,w) <= ...
|
||||
uws := math.Acos(float64(simw))
|
||||
|
||||
// ... acos(u,v) + acos(v,w)
|
||||
for v, _ := range simsu {
|
||||
uvws := math.Acos(float64(simsu[v])) + math.Acos(float64(sims[v][w]))
|
||||
|
||||
//log.Debug().Str("u", u).Str("v", v).Str("w", w).Send()
|
||||
//log.Debug().Float32("uw", simw).Float32("uv", simsu[v]).Float32("vw", sims[v][w]).Send()
|
||||
Expect(uws).To(BeNumerically("<=", uvws))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
It("It obeys the triangle inequality for normalized values", func() {
|
||||
keys := [][]float32{
|
||||
{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0},
|
||||
{-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, -1.0},
|
||||
{2.0, 3.0, 4.0}, {9.0, 7.0, 1.0}, {0.0, -1.2, 2.3},
|
||||
}
|
||||
vals := [][]byte{
|
||||
[]byte("x"), []byte("y"), []byte("z"),
|
||||
[]byte("-x"), []byte("-y"), []byte("-z"),
|
||||
[]byte("u"), []byte("v"), []byte("w"),
|
||||
}
|
||||
|
||||
normalize(keys[6:])
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
expectTriangleEq(keys, vals)
|
||||
})
|
||||
|
||||
It("It obeys the triangle inequality", func() {
|
||||
rnd := rand.New(rand.NewSource(151))
|
||||
keys := make([][]float32, 20)
|
||||
vals := make([][]byte, 20)
|
||||
|
||||
for i := range keys {
|
||||
k := make([]float32, 768)
|
||||
|
||||
for j := range k {
|
||||
k[j] = rnd.Float32()
|
||||
}
|
||||
|
||||
keys[i] = k
|
||||
}
|
||||
|
||||
c := byte('a')
|
||||
for i := range vals {
|
||||
vals[i] = []byte{c}
|
||||
c += 1
|
||||
}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
expectTriangleEq(keys, vals)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user