mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 16:51:44 -04:00
Compare commits
80 Commits
chromem_st
...
extra-l4t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
27d7ada8dd | ||
|
|
464686aee6 | ||
|
|
bfa3d4ccff | ||
|
|
6a91288c8c | ||
|
|
96cb407ee0 | ||
|
|
5a19094d3a | ||
|
|
e3b943ffcb | ||
|
|
df30d6a482 | ||
|
|
c3c27b7e3d | ||
|
|
431716d4d6 | ||
|
|
d290fd159f | ||
|
|
051faaf771 | ||
|
|
41a2dfb0d9 | ||
|
|
ed0094c3d0 | ||
|
|
52fadeded1 | ||
|
|
a37fa8d9c4 | ||
|
|
03974a4dd4 | ||
|
|
1d6afbd65d | ||
|
|
d79f02ea09 | ||
|
|
ba2f426e3e | ||
|
|
732042e5c6 | ||
|
|
f1763aabf2 | ||
|
|
e0d90b173b | ||
|
|
ff07612bfa | ||
|
|
7badaf78a0 | ||
|
|
af41436f1b | ||
|
|
cd5489ce47 | ||
|
|
60ec2cf751 | ||
|
|
244f4b564f | ||
|
|
f1d6d65417 | ||
|
|
72e52c4f6a | ||
|
|
1656e1a88e | ||
|
|
7f62b418a4 | ||
|
|
1f4e66d638 | ||
|
|
a37b2c765c | ||
|
|
b4b67e00bd | ||
|
|
91e1ff5a95 | ||
|
|
d9204ea3b5 | ||
|
|
3d0fbcb4f7 | ||
|
|
03f3df9a82 | ||
|
|
fff35d5528 | ||
|
|
539e94db73 | ||
|
|
0f4f62cf3c | ||
|
|
e7cffd7afa | ||
|
|
26d790a2b6 | ||
|
|
5cf838c08d | ||
|
|
4db8f5cbce | ||
|
|
3b6b37a81b | ||
|
|
8f5aa2d9de | ||
|
|
a6bc8aa7c7 | ||
|
|
4ab107bc1a | ||
|
|
4c3710a531 | ||
|
|
901b06284a | ||
|
|
8eef5a2c5e | ||
|
|
e9cace137b | ||
|
|
9409c99738 | ||
|
|
4d44ebc2f2 | ||
|
|
9a1182fa01 | ||
|
|
66e9ef3f33 | ||
|
|
8282414583 | ||
|
|
d1d7ce83d4 | ||
|
|
5177837ab0 | ||
|
|
f9e368b7c4 | ||
|
|
eef80b9880 | ||
|
|
073eaec729 | ||
|
|
318225f631 | ||
|
|
89429a439b | ||
|
|
200fe358f0 | ||
|
|
e426ab7c23 | ||
|
|
715071b68d | ||
|
|
a05737c7e4 | ||
|
|
e8eb0b2c50 | ||
|
|
e15d29aba2 | ||
|
|
10675ac28e | ||
|
|
0ec25b8b07 | ||
|
|
e81ceff681 | ||
|
|
6831719e1e | ||
|
|
b264a91b3f | ||
|
|
1a08948e63 | ||
|
|
14a1e02f44 |
@@ -7,7 +7,7 @@ services:
|
|||||||
args:
|
args:
|
||||||
- FFMPEG=true
|
- FFMPEG=true
|
||||||
- IMAGE_TYPE=extras
|
- IMAGE_TYPE=extras
|
||||||
- GO_TAGS=stablediffusion p2p tts
|
- GO_TAGS=p2p tts
|
||||||
env_file:
|
env_file:
|
||||||
- ../.env
|
- ../.env
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
6
.env
6
.env
@@ -38,12 +38,12 @@
|
|||||||
## Uncomment and set to true to enable rebuilding from source
|
## Uncomment and set to true to enable rebuilding from source
|
||||||
# REBUILD=true
|
# REBUILD=true
|
||||||
|
|
||||||
## Enable go tags, available: stablediffusion, tts
|
## Enable go tags, available: p2p, tts
|
||||||
## stablediffusion: image generation with stablediffusion
|
## p2p: enable distributed inferencing
|
||||||
## tts: enables text-to-speech with go-piper
|
## tts: enables text-to-speech with go-piper
|
||||||
## (requires REBUILD=true)
|
## (requires REBUILD=true)
|
||||||
#
|
#
|
||||||
# GO_TAGS=stablediffusion
|
# GO_TAGS=p2p
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
|
|||||||
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@v2.2.0
|
uses: dependabot/fetch-metadata@v2.3.0
|
||||||
with:
|
with:
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
|||||||
4
.github/workflows/notify-models.yaml
vendored
4
.github/workflows/notify-models.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
@@ -99,7 +99,7 @@ jobs:
|
|||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
- uses: GrantBirki/git-diff-action@v2.8.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
json_diff_file_output: diff.json
|
json_diff_file_output: diff.json
|
||||||
|
|||||||
35
.github/workflows/release.yaml
vendored
35
.github/workflows/release.yaml
vendored
@@ -237,40 +237,7 @@ jobs:
|
|||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
limit-access-to-actor: true
|
limit-access-to-actor: true
|
||||||
build-stablediffusion:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21.x'
|
|
||||||
cache: false
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
|
||||||
- name: Build stablediffusion
|
|
||||||
run: |
|
|
||||||
export PATH=$PATH:$GOPATH/bin
|
|
||||||
make backend-assets/grpc/stablediffusion
|
|
||||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
|
||||||
env:
|
|
||||||
GO_TAGS: stablediffusion
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: stablediffusion
|
|
||||||
path: release/
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
|
|
||||||
build-macOS-x86_64:
|
build-macOS-x86_64:
|
||||||
runs-on: macos-13
|
runs-on: macos-13
|
||||||
|
|||||||
51
.github/workflows/test-extra.yml
vendored
51
.github/workflows/test-extra.yml
vendored
@@ -78,57 +78,6 @@ jobs:
|
|||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||||
|
|
||||||
tests-parler-tts:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test parler-tts
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-openvoice:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential ffmpeg
|
|
||||||
# Install UV
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
|
||||||
sudo apt-get install -y libopencv-dev
|
|
||||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
|
||||||
|
|
||||||
- name: Test openvoice
|
|
||||||
run: |
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
|
||||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
|
||||||
|
|
||||||
# tests-transformers-musicgen:
|
# tests-transformers-musicgen:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
# steps:
|
# steps:
|
||||||
|
|||||||
6
.github/workflows/test.yml
vendored
6
.github/workflows/test.yml
vendored
@@ -105,9 +105,7 @@ jobs:
|
|||||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||||
make sources/go-piper && \
|
make sources/go-piper && \
|
||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-4
|
CUDA_VERSION: 12-4
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
@@ -129,7 +127,7 @@ jobs:
|
|||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
uses: mxschmitt/action-tmate@v3.19
|
||||||
|
|||||||
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@@ -26,7 +26,7 @@
|
|||||||
"LOCALAI_P2P": "true",
|
"LOCALAI_P2P": "true",
|
||||||
"LOCALAI_FEDERATED": "true"
|
"LOCALAI_FEDERATED": "true"
|
||||||
},
|
},
|
||||||
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
"buildFlags": ["-tags", "p2p tts", "-v"],
|
||||||
"envFile": "${workspaceFolder}/.env",
|
"envFile": "${workspaceFolder}/.env",
|
||||||
"cwd": "${workspaceRoot}"
|
"cwd": "${workspaceRoot}"
|
||||||
}
|
}
|
||||||
|
|||||||
55
Dockerfile
55
Dockerfile
@@ -15,8 +15,7 @@ ARG TARGETARCH
|
|||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||||
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
@@ -69,14 +68,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
|
|||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libopenblas-dev \
|
libopenblas-dev && \
|
||||||
libopencv-dev && \
|
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Set up OpenCV
|
|
||||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
@@ -251,7 +246,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
|||||||
|
|
||||||
FROM requirements-drivers AS builder-base
|
FROM requirements-drivers AS builder-base
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts p2p"
|
ARG GO_TAGS="tts p2p"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG LD_FLAGS="-s -w"
|
ARG LD_FLAGS="-s -w"
|
||||||
@@ -285,35 +280,12 @@ RUN <<EOT bash
|
|||||||
fi
|
fi
|
||||||
EOT
|
EOT
|
||||||
|
|
||||||
|
|
||||||
###################################
|
|
||||||
###################################
|
|
||||||
|
|
||||||
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
|
||||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
|
||||||
FROM builder-base AS builder-sd
|
|
||||||
|
|
||||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
|
||||||
COPY Makefile .
|
|
||||||
COPY go.mod .
|
|
||||||
COPY go.sum .
|
|
||||||
COPY backend/backend.proto ./backend/backend.proto
|
|
||||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
COPY pkg/grpc ./pkg/grpc
|
|
||||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
|
||||||
RUN git init
|
|
||||||
RUN make sources/go-stable-diffusion
|
|
||||||
RUN touch prepare-sources
|
|
||||||
|
|
||||||
# Actually build the backend
|
|
||||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||||
# Adjustments to the build process should likely be made here.
|
# Adjustments to the build process should likely be made here.
|
||||||
FROM builder-sd AS builder
|
FROM builder-base AS builder
|
||||||
|
|
||||||
# Install the pre-built GRPC
|
# Install the pre-built GRPC
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
@@ -331,7 +303,7 @@ RUN make prepare
|
|||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||||
## (both will use CUDA or hipblas for the actual computation)
|
## (both will use CUDA or hipblas for the actual computation)
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
fi
|
fi
|
||||||
@@ -353,8 +325,6 @@ ARG FFMPEG
|
|||||||
|
|
||||||
COPY --from=grpc /opt/grpc /usr/local
|
COPY --from=grpc /opt/grpc /usr/local
|
||||||
|
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||||
|
|
||||||
# Add FFmpeg
|
# Add FFmpeg
|
||||||
@@ -384,12 +354,14 @@ FROM requirements-drivers
|
|||||||
|
|
||||||
ARG FFMPEG
|
ARG FFMPEG
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
|
ARG BUILD_PLATFORM
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG EXTRA_BACKENDS
|
ARG EXTRA_BACKENDS
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
ENV BUILD_PLATFORM=${BUILD_PLATFORM}
|
||||||
ENV REBUILD=false
|
ENV REBUILD=false
|
||||||
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
|
||||||
ENV MAKEFLAGS=${MAKEFLAGS}
|
ENV MAKEFLAGS=${MAKEFLAGS}
|
||||||
@@ -427,9 +399,6 @@ COPY --from=builder /build/local-ai ./
|
|||||||
# Copy shared libraries for piper
|
# Copy shared libraries for piper
|
||||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||||
|
|
||||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
|
||||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
|
||||||
|
|
||||||
# Change the shell to bash so we can use [[ tests below
|
# Change the shell to bash so we can use [[ tests below
|
||||||
SHELL ["/bin/bash", "-c"]
|
SHELL ["/bin/bash", "-c"]
|
||||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||||
@@ -443,8 +412,8 @@ RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
|||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/coqui \
|
make -C backend/python/coqui \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/parler-tts \
|
make -C backend/python/faster-whisper \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/diffusers \
|
make -C backend/python/diffusers \
|
||||||
@@ -453,9 +422,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
|||||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/kokoro \
|
make -C backend/python/kokoro \
|
||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/openvoice \
|
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/exllama2 \
|
make -C backend/python/exllama2 \
|
||||||
; fi && \
|
; fi && \
|
||||||
@@ -474,9 +440,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
|
|||||||
; fi && \
|
; fi && \
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||||
make -C backend/python/rerankers \
|
make -C backend/python/rerankers \
|
||||||
; fi && \
|
|
||||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
|
||||||
make -C backend/python/mamba \
|
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# Make sure the models directory exists
|
# Make sure the models directory exists
|
||||||
|
|||||||
88
Makefile
88
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7
|
CPPLLAMA_VERSION?=5598f475be3e31430fbe17ebb85654ec90dc201e
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
@@ -18,10 +18,6 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
|||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||||
|
|
||||||
# stablediffusion version
|
|
||||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
|
||||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
|
||||||
|
|
||||||
# bark.cpp
|
# bark.cpp
|
||||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||||
BARKCPP_VERSION?=v1.0.0
|
BARKCPP_VERSION?=v1.0.0
|
||||||
@@ -179,11 +175,6 @@ ifeq ($(STATIC),true)
|
|||||||
LD_FLAGS+=-linkmode external -extldflags -static
|
LD_FLAGS+=-linkmode external -extldflags -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
|
||||||
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
|
|
||||||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||||
@@ -195,6 +186,7 @@ endif
|
|||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
@@ -273,19 +265,6 @@ sources/go-piper:
|
|||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
## stable diffusion (onnx)
|
|
||||||
sources/go-stable-diffusion:
|
|
||||||
mkdir -p sources/go-stable-diffusion
|
|
||||||
cd sources/go-stable-diffusion && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(STABLEDIFFUSION_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
|
||||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
|
||||||
|
|
||||||
## stablediffusion (ggml)
|
## stablediffusion (ggml)
|
||||||
sources/stablediffusion-ggml.cpp:
|
sources/stablediffusion-ggml.cpp:
|
||||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||||
@@ -331,20 +310,18 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||||
|
|
||||||
prepare-sources: get-sources replace
|
prepare-sources: get-sources replace
|
||||||
@@ -355,7 +332,6 @@ rebuild: ## Rebuilds the project
|
|||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
@@ -470,7 +446,7 @@ prepare-test: grpcs
|
|||||||
|
|
||||||
test: prepare test-models/testmodel.ggml grpcs
|
test: prepare test-models/testmodel.ggml grpcs
|
||||||
@echo 'Running tests'
|
@echo 'Running tests'
|
||||||
export GO_TAGS="tts stablediffusion debug"
|
export GO_TAGS="tts debug"
|
||||||
$(MAKE) prepare-test
|
$(MAKE) prepare-test
|
||||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
@@ -558,10 +534,10 @@ protogen-go-clean:
|
|||||||
$(RM) bin/*
|
$(RM) bin/*
|
||||||
|
|
||||||
.PHONY: protogen-python
|
.PHONY: protogen-python
|
||||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||||
|
|
||||||
.PHONY: protogen-python-clean
|
.PHONY: protogen-python-clean
|
||||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||||
|
|
||||||
.PHONY: autogptq-protogen
|
.PHONY: autogptq-protogen
|
||||||
autogptq-protogen:
|
autogptq-protogen:
|
||||||
@@ -595,6 +571,14 @@ diffusers-protogen:
|
|||||||
diffusers-protogen-clean:
|
diffusers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: faster-whisper-protogen
|
||||||
|
faster-whisper-protogen:
|
||||||
|
$(MAKE) -C backend/python/faster-whisper protogen
|
||||||
|
|
||||||
|
.PHONY: faster-whisper-protogen-clean
|
||||||
|
faster-whisper-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/faster-whisper protogen-clean
|
||||||
|
|
||||||
.PHONY: exllama2-protogen
|
.PHONY: exllama2-protogen
|
||||||
exllama2-protogen:
|
exllama2-protogen:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen
|
$(MAKE) -C backend/python/exllama2 protogen
|
||||||
@@ -603,14 +587,6 @@ exllama2-protogen:
|
|||||||
exllama2-protogen-clean:
|
exllama2-protogen-clean:
|
||||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||||
|
|
||||||
.PHONY: mamba-protogen
|
|
||||||
mamba-protogen:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen
|
|
||||||
|
|
||||||
.PHONY: mamba-protogen-clean
|
|
||||||
mamba-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/mamba protogen-clean
|
|
||||||
|
|
||||||
.PHONY: rerankers-protogen
|
.PHONY: rerankers-protogen
|
||||||
rerankers-protogen:
|
rerankers-protogen:
|
||||||
$(MAKE) -C backend/python/rerankers protogen
|
$(MAKE) -C backend/python/rerankers protogen
|
||||||
@@ -627,14 +603,6 @@ transformers-protogen:
|
|||||||
transformers-protogen-clean:
|
transformers-protogen-clean:
|
||||||
$(MAKE) -C backend/python/transformers protogen-clean
|
$(MAKE) -C backend/python/transformers protogen-clean
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen
|
|
||||||
parler-tts-protogen:
|
|
||||||
$(MAKE) -C backend/python/parler-tts protogen
|
|
||||||
|
|
||||||
.PHONY: parler-tts-protogen-clean
|
|
||||||
parler-tts-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/parler-tts protogen-clean
|
|
||||||
|
|
||||||
.PHONY: kokoro-protogen
|
.PHONY: kokoro-protogen
|
||||||
kokoro-protogen:
|
kokoro-protogen:
|
||||||
$(MAKE) -C backend/python/kokoro protogen
|
$(MAKE) -C backend/python/kokoro protogen
|
||||||
@@ -643,14 +611,6 @@ kokoro-protogen:
|
|||||||
kokoro-protogen-clean:
|
kokoro-protogen-clean:
|
||||||
$(MAKE) -C backend/python/kokoro protogen-clean
|
$(MAKE) -C backend/python/kokoro protogen-clean
|
||||||
|
|
||||||
.PHONY: openvoice-protogen
|
|
||||||
openvoice-protogen:
|
|
||||||
$(MAKE) -C backend/python/openvoice protogen
|
|
||||||
|
|
||||||
.PHONY: openvoice-protogen-clean
|
|
||||||
openvoice-protogen-clean:
|
|
||||||
$(MAKE) -C backend/python/openvoice protogen-clean
|
|
||||||
|
|
||||||
.PHONY: vllm-protogen
|
.PHONY: vllm-protogen
|
||||||
vllm-protogen:
|
vllm-protogen:
|
||||||
$(MAKE) -C backend/python/vllm protogen
|
$(MAKE) -C backend/python/vllm protogen
|
||||||
@@ -666,13 +626,11 @@ prepare-extra-conda-environments: protogen-python
|
|||||||
$(MAKE) -C backend/python/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C backend/python/coqui
|
$(MAKE) -C backend/python/coqui
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
$(MAKE) -C backend/python/faster-whisper
|
||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
$(MAKE) -C backend/python/mamba
|
|
||||||
$(MAKE) -C backend/python/rerankers
|
$(MAKE) -C backend/python/rerankers
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/parler-tts
|
|
||||||
$(MAKE) -C backend/python/kokoro
|
$(MAKE) -C backend/python/kokoro
|
||||||
$(MAKE) -C backend/python/openvoice
|
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra: protogen-python
|
prepare-test-extra: protogen-python
|
||||||
@@ -742,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
|
|||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-avx512
|
||||||
|
$(MAKE) -C backend/cpp/llama-avx512 purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
@@ -816,13 +781,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/piper
|
$(UPX) backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/stablediffusion
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||||
@@ -903,7 +861,7 @@ swagger:
|
|||||||
|
|
||||||
.PHONY: gen-assets
|
.PHONY: gen-assets
|
||||||
gen-assets:
|
gen-assets:
|
||||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
docs/layouts/_default:
|
docs/layouts/_default:
|
||||||
|
|||||||
@@ -39,7 +39,7 @@
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
|
|||||||
@@ -1,56 +1,17 @@
|
|||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
backend: stablediffusion
|
backend: stablediffusion-ggml
|
||||||
|
cfg_scale: 4.5
|
||||||
|
|
||||||
|
options:
|
||||||
|
- sampler:euler
|
||||||
parameters:
|
parameters:
|
||||||
model: stablediffusion_assets
|
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
||||||
|
step: 25
|
||||||
license: "BSD-3"
|
|
||||||
urls:
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
|
||||||
|
|
||||||
description: |
|
|
||||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
|
||||||
|
|
||||||
download_files:
|
download_files:
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
|
||||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
|
||||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
|
||||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
|
||||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
|
||||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
|
||||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
|
||||||
- filename: "stablediffusion_assets/vocab.txt"
|
|
||||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
|
||||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/images/generations \
|
curl http://localhost:8080/v1/images/generations \
|
||||||
|
|||||||
@@ -21,7 +21,8 @@ service Backend {
|
|||||||
rpc Status(HealthMessage) returns (StatusResponse) {}
|
rpc Status(HealthMessage) returns (StatusResponse) {}
|
||||||
|
|
||||||
rpc StoresSet(StoresSetOptions) returns (Result) {}
|
rpc StoresSet(StoresSetOptions) returns (Result) {}
|
||||||
rpc StoresReset(StoresResetOptions) returns (Result) {}
|
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
||||||
|
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||||
|
|
||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
@@ -77,10 +78,19 @@ message StoresSetOptions {
|
|||||||
repeated StoresValue Values = 2;
|
repeated StoresValue Values = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message StoresResetOptions {
|
message StoresDeleteOptions {
|
||||||
repeated StoresKey Keys = 1;
|
repeated StoresKey Keys = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message StoresGetOptions {
|
||||||
|
repeated StoresKey Keys = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message StoresGetResult {
|
||||||
|
repeated StoresKey Keys = 1;
|
||||||
|
repeated StoresValue Values = 2;
|
||||||
|
}
|
||||||
|
|
||||||
message StoresFindOptions {
|
message StoresFindOptions {
|
||||||
StoresKey Key = 1;
|
StoresKey Key = 1;
|
||||||
int32 TopK = 2;
|
int32 TopK = 2;
|
||||||
@@ -153,6 +163,11 @@ message Reply {
|
|||||||
double timing_token_generation = 5;
|
double timing_token_generation = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message GrammarTrigger {
|
||||||
|
string word = 1;
|
||||||
|
bool at_start = 2;
|
||||||
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
string Model = 1;
|
string Model = 1;
|
||||||
int32 ContextSize = 2;
|
int32 ContextSize = 2;
|
||||||
@@ -237,6 +252,8 @@ message ModelOptions {
|
|||||||
|
|
||||||
string CacheTypeKey = 63;
|
string CacheTypeKey = 63;
|
||||||
string CacheTypeValue = 64;
|
string CacheTypeValue = 64;
|
||||||
|
|
||||||
|
repeated GrammarTrigger GrammarTriggers = 65;
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
|
|||||||
@@ -468,6 +468,9 @@ struct llama_server_context
|
|||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
bool has_eos_token = true;
|
bool has_eos_token = true;
|
||||||
|
|
||||||
|
bool grammar_lazy = false;
|
||||||
|
std::vector<common_grammar_trigger> grammar_trigger_words;
|
||||||
|
|
||||||
int32_t n_ctx; // total context for all clients / slots
|
int32_t n_ctx; // total context for all clients / slots
|
||||||
|
|
||||||
// system prompt
|
// system prompt
|
||||||
@@ -706,6 +709,8 @@ struct llama_server_context
|
|||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
|
slot->sparams.grammar_trigger_words = grammar_trigger_words;
|
||||||
|
slot->sparams.grammar_lazy = grammar_lazy;
|
||||||
|
|
||||||
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
|
||||||
// Might be better to reject the request with a 400 ?
|
// Might be better to reject the request with a 400 ?
|
||||||
@@ -2374,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
if ( request->ropefreqscale() != 0.0f ) {
|
if ( request->ropefreqscale() != 0.0f ) {
|
||||||
params.rope_freq_scale = request->ropefreqscale();
|
params.rope_freq_scale = request->ropefreqscale();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (request->grammartriggers_size() > 0) {
|
||||||
|
LOG_INFO("configuring grammar triggers", {});
|
||||||
|
llama.grammar_lazy = true;
|
||||||
|
for (int i = 0; i < request->grammartriggers_size(); i++) {
|
||||||
|
common_grammar_trigger trigger;
|
||||||
|
trigger.word = request->grammartriggers(i).word();
|
||||||
|
trigger.at_start = request->grammartriggers(i).at_start();
|
||||||
|
llama.grammar_trigger_words.push_back(trigger);
|
||||||
|
LOG_INFO("grammar trigger", {
|
||||||
|
{ "word", trigger.word },
|
||||||
|
{ "at_start", trigger.at_start }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2522,6 +2542,18 @@ public:
|
|||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
|
||||||
|
json data = parse_options(false, request, llama);
|
||||||
|
|
||||||
|
std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
|
||||||
|
|
||||||
|
for (int i=0 ; i< tokens.size(); i++){
|
||||||
|
response->add_tokens(tokens[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return grpc::Status::OK;
|
||||||
|
}
|
||||||
|
|
||||||
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||||
llama_client_slot* active_slot = llama.get_active_slot();
|
llama_client_slot* active_slot = llama.get_active_slot();
|
||||||
|
|
||||||
|
|||||||
@@ -1,21 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Image struct {
|
|
||||||
base.SingleThread
|
|
||||||
stablediffusion *stablediffusion.StableDiffusion
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
|
||||||
var err error
|
|
||||||
// Note: the Model here is a path to a directory containing the model files
|
|
||||||
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
return image.stablediffusion.GenerateImage(
|
|
||||||
int(opts.Height),
|
|
||||||
int(opts.Width),
|
|
||||||
int(opts.Mode),
|
|
||||||
int(opts.Step),
|
|
||||||
int(opts.Seed),
|
|
||||||
opts.PositivePrompt,
|
|
||||||
opts.NegativePrompt,
|
|
||||||
opts.Dst)
|
|
||||||
}
|
|
||||||
@@ -4,36 +4,101 @@ package main
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
"context"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"runtime"
|
"slices"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
chromem "github.com/philippgille/chromem-go"
|
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Store struct {
|
type Store struct {
|
||||||
base.SingleThread
|
base.SingleThread
|
||||||
*chromem.DB
|
|
||||||
*chromem.Collection
|
// The sorted keys
|
||||||
|
keys [][]float32
|
||||||
|
// The sorted values
|
||||||
|
values [][]byte
|
||||||
|
|
||||||
|
// If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions
|
||||||
|
// TODO: Should we normalize incoming keys if they are not instead?
|
||||||
|
keysAreNormalized bool
|
||||||
|
// The first key decides the length of the keys
|
||||||
|
keyLen int
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because
|
||||||
|
// that's theoretically best for memory layout and cache locality, but this isn't optimized yet.
|
||||||
|
type Pair struct {
|
||||||
|
Key []float32
|
||||||
|
Value []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewStore() *Store {
|
func NewStore() *Store {
|
||||||
return &Store{}
|
return &Store{
|
||||||
|
keys: make([][]float32, 0),
|
||||||
|
values: make([][]byte, 0),
|
||||||
|
keysAreNormalized: true,
|
||||||
|
keyLen: -1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareSlices(k1, k2 []float32) int {
|
||||||
|
assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
|
||||||
|
|
||||||
|
return slices.Compare(k1, k2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasKey(unsortedSlice [][]float32, target []float32) bool {
|
||||||
|
return slices.ContainsFunc(unsortedSlice, func(k []float32) bool {
|
||||||
|
return compareSlices(k, target) == 0
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) {
|
||||||
|
return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int {
|
||||||
|
return compareSlices(k, t)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSortedPairs(kvs []Pair) bool {
|
||||||
|
for i := 1; i < len(kvs); i++ {
|
||||||
|
if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSortedKeys(keys [][]float32) bool {
|
||||||
|
for i := 1; i < len(keys); i++ {
|
||||||
|
if compareSlices(keys[i-1], keys[i]) > 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
|
||||||
|
ks := make([][]float32, len(keys))
|
||||||
|
|
||||||
|
for i, k := range keys {
|
||||||
|
ks[i] = k.Floats
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.SortFunc(ks, compareSlices)
|
||||||
|
|
||||||
|
assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys)))
|
||||||
|
assert(isSortedKeys(ks), "keys are not sorted")
|
||||||
|
|
||||||
|
return ks
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Store) Load(opts *pb.ModelOptions) error {
|
func (s *Store) Load(opts *pb.ModelOptions) error {
|
||||||
db := chromem.NewDB()
|
|
||||||
collection, err := db.CreateCollection("all-documents", nil, nil)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
s.DB = db
|
|
||||||
s.Collection = collection
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -46,25 +111,156 @@ func (s *Store) StoresSet(opts *pb.StoresSetOptions) error {
|
|||||||
if len(opts.Keys) != len(opts.Values) {
|
if len(opts.Keys) != len(opts.Values) {
|
||||||
return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
|
return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
|
||||||
}
|
}
|
||||||
docs := []chromem.Document{}
|
|
||||||
|
if s.keyLen == -1 {
|
||||||
|
s.keyLen = len(opts.Keys[0].Floats)
|
||||||
|
} else {
|
||||||
|
if len(opts.Keys[0].Floats) != s.keyLen {
|
||||||
|
return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
kvs := make([]Pair, len(opts.Keys))
|
||||||
|
|
||||||
for i, k := range opts.Keys {
|
for i, k := range opts.Keys {
|
||||||
docs = append(docs, chromem.Document{
|
if s.keysAreNormalized && !isNormalized(k.Floats) {
|
||||||
ID: k.String(),
|
s.keysAreNormalized = false
|
||||||
Content: opts.Values[i].String(),
|
var sample []float32
|
||||||
})
|
if len(s.keys) > 5 {
|
||||||
|
sample = k.Floats[:5]
|
||||||
|
} else {
|
||||||
|
sample = k.Floats
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Key is not normalized: %v", sample)
|
||||||
|
}
|
||||||
|
|
||||||
|
kvs[i] = Pair{
|
||||||
|
Key: k.Floats,
|
||||||
|
Value: opts.Values[i].Bytes,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return s.Collection.AddDocuments(context.Background(), docs, runtime.NumCPU())
|
slices.SortFunc(kvs, func(a, b Pair) int {
|
||||||
|
return compareSlices(a.Key, b.Key)
|
||||||
|
})
|
||||||
|
|
||||||
|
assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys)))
|
||||||
|
assert(isSortedPairs(kvs), "keys are not sorted")
|
||||||
|
|
||||||
|
l := len(kvs) + len(s.keys)
|
||||||
|
merge_ks := make([][]float32, 0, l)
|
||||||
|
merge_vs := make([][]byte, 0, l)
|
||||||
|
|
||||||
|
i, j := 0, 0
|
||||||
|
for {
|
||||||
|
if i+j >= l {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if i >= len(kvs) {
|
||||||
|
merge_ks = append(merge_ks, s.keys[j])
|
||||||
|
merge_vs = append(merge_vs, s.values[j])
|
||||||
|
j++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if j >= len(s.keys) {
|
||||||
|
merge_ks = append(merge_ks, kvs[i].Key)
|
||||||
|
merge_vs = append(merge_vs, kvs[i].Value)
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
c := compareSlices(kvs[i].Key, s.keys[j])
|
||||||
|
if c < 0 {
|
||||||
|
merge_ks = append(merge_ks, kvs[i].Key)
|
||||||
|
merge_vs = append(merge_vs, kvs[i].Value)
|
||||||
|
i++
|
||||||
|
} else if c > 0 {
|
||||||
|
merge_ks = append(merge_ks, s.keys[j])
|
||||||
|
merge_vs = append(merge_vs, s.values[j])
|
||||||
|
j++
|
||||||
|
} else {
|
||||||
|
merge_ks = append(merge_ks, kvs[i].Key)
|
||||||
|
merge_vs = append(merge_vs, kvs[i].Value)
|
||||||
|
i++
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l))
|
||||||
|
assert(isSortedKeys(merge_ks), "merge keys are not sorted")
|
||||||
|
|
||||||
|
s.keys = merge_ks
|
||||||
|
s.values = merge_vs
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Store) StoresReset(opts *pb.StoresResetOptions) error {
|
func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error {
|
||||||
err := s.DB.DeleteCollection("all-documents")
|
if len(opts.Keys) == 0 {
|
||||||
if err != nil {
|
return fmt.Errorf("no keys to delete")
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
s.Collection, err = s.CreateCollection("all-documents", nil, nil)
|
|
||||||
return err
|
if len(opts.Keys) == 0 {
|
||||||
|
return fmt.Errorf("no keys to add")
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.keyLen == -1 {
|
||||||
|
s.keyLen = len(opts.Keys[0].Floats)
|
||||||
|
} else {
|
||||||
|
if len(opts.Keys[0].Floats) != s.keyLen {
|
||||||
|
return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ks := sortIntoKeySlicese(opts.Keys)
|
||||||
|
|
||||||
|
l := len(s.keys) - len(ks)
|
||||||
|
merge_ks := make([][]float32, 0, l)
|
||||||
|
merge_vs := make([][]byte, 0, l)
|
||||||
|
|
||||||
|
tail_ks := s.keys
|
||||||
|
tail_vs := s.values
|
||||||
|
for _, k := range ks {
|
||||||
|
j, found := findInSortedSlice(tail_ks, k)
|
||||||
|
|
||||||
|
if found {
|
||||||
|
merge_ks = append(merge_ks, tail_ks[:j]...)
|
||||||
|
merge_vs = append(merge_vs, tail_vs[:j]...)
|
||||||
|
tail_ks = tail_ks[j+1:]
|
||||||
|
tail_vs = tail_vs[j+1:]
|
||||||
|
} else {
|
||||||
|
assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k))
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs))
|
||||||
|
}
|
||||||
|
|
||||||
|
merge_ks = append(merge_ks, tail_ks...)
|
||||||
|
merge_vs = append(merge_vs, tail_vs...)
|
||||||
|
|
||||||
|
assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys)))
|
||||||
|
|
||||||
|
s.keys = merge_ks
|
||||||
|
s.values = merge_vs
|
||||||
|
|
||||||
|
assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l))
|
||||||
|
assert(isSortedKeys(s.keys), "keys are not sorted")
|
||||||
|
assert(func() bool {
|
||||||
|
for _, k := range ks {
|
||||||
|
if _, found := findInSortedSlice(s.keys, k); found {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}(), "Keys to delete still present")
|
||||||
|
|
||||||
|
if len(s.keys) != l {
|
||||||
|
log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
|
func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
|
||||||
@@ -115,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func isNormalized(k []float32) bool {
|
func isNormalized(k []float32) bool {
|
||||||
var sum float32
|
var sum float64
|
||||||
|
|
||||||
for _, v := range k {
|
for _, v := range k {
|
||||||
sum += v
|
v64 := float64(v)
|
||||||
|
sum += v64*v64
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum == 1.0
|
s := math.Sqrt(sum)
|
||||||
|
|
||||||
|
return s >= 0.99 && s <= 1.01
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This we could replace with handwritten SIMD code
|
// TODO: This we could replace with handwritten SIMD code
|
||||||
@@ -132,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
|
|||||||
dot += k1[i] * k2[i]
|
dot += k1[i] * k2[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
|
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
|
||||||
|
|
||||||
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
||||||
return dot
|
return dot
|
||||||
@@ -222,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
|
|||||||
|
|
||||||
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
||||||
|
|
||||||
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
|
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
|
||||||
|
|
||||||
return sim
|
return sim
|
||||||
}
|
}
|
||||||
|
|||||||
2
backend/python/autogptq/requirements-l4t.txt
Normal file
2
backend/python/autogptq/requirements-l4t.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
5
backend/python/bark/requirements-l4t.txt
Normal file
5
backend/python/bark/requirements-l4t.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -132,11 +132,16 @@ function installRequirements() {
|
|||||||
declare -a requirementFiles=(
|
declare -a requirementFiles=(
|
||||||
"${EDIR}/requirements-install.txt"
|
"${EDIR}/requirements-install.txt"
|
||||||
"${EDIR}/requirements.txt"
|
"${EDIR}/requirements.txt"
|
||||||
"${EDIR}/requirements-${BUILD_TYPE}.txt"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
if [ -n "${BUILD_PLATFORM}" ]; then
|
||||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}.txt")
|
||||||
|
else
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}.txt")
|
||||||
|
|
||||||
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
|
||||||
@@ -146,8 +151,14 @@ function installRequirements() {
|
|||||||
|
|
||||||
requirementFiles+=("${EDIR}/requirements-after.txt")
|
requirementFiles+=("${EDIR}/requirements-after.txt")
|
||||||
|
|
||||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
if [ -n "${BUILD_PLATFORM}" ]; then
|
||||||
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PLATFORM}-after.txt")
|
||||||
|
else
|
||||||
|
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
|
||||||
|
else
|
||||||
|
requirementFiles+=("${EDIR}/requirements-${BUILD_TYPE}-after.txt")
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for reqFile in ${requirementFiles[@]}; do
|
for reqFile in ${requirementFiles[@]}; do
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
grpcio-tools
|
||||||
6
backend/python/coqui/requirements-l4t.txt
Normal file
6
backend/python/coqui/requirements-l4t.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
torchaudio
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
coqui-tts
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
10
backend/python/diffusers/requirements-l4t.txt
Normal file
10
backend/python/diffusers/requirements-l4t.txt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
diffusers
|
||||||
|
opencv-python
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
4
backend/python/exllama2/requirements-l4t.txt
Normal file
4
backend/python/exllama2/requirements-l4t.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
.DEFAULT_GOAL := install
|
.DEFAULT_GOAL := install
|
||||||
|
|
||||||
.PHONY: install
|
.PHONY: install
|
||||||
install: protogen
|
install:
|
||||||
bash install.sh
|
bash install.sh
|
||||||
|
$(MAKE) protogen
|
||||||
|
|
||||||
.PHONY: protogen
|
.PHONY: protogen
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
@@ -12,14 +13,8 @@ protogen-clean:
|
|||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
bash protogen.sh
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: protogen-clean
|
clean: protogen-clean
|
||||||
rm -rf venv __pycache__
|
rm -rf venv __pycache__
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing openvoice..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "openvoice tested."
|
|
||||||
94
backend/python/faster-whisper/backend.py
Executable file
94
backend/python/faster-whisper/backend.py
Executable file
@@ -0,0 +1,94 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
This is an extra gRPC server of LocalAI for Bark TTS
|
||||||
|
"""
|
||||||
|
from concurrent import futures
|
||||||
|
import time
|
||||||
|
import argparse
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
|
||||||
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
|
||||||
|
|
||||||
|
# Implement the BackendServicer class with the service methods
|
||||||
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
"""
|
||||||
|
BackendServicer is the class that implements the gRPC service
|
||||||
|
"""
|
||||||
|
def Health(self, request, context):
|
||||||
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
device = "cpu"
|
||||||
|
# Get device
|
||||||
|
# device = "cuda" if request.CUDA else "cpu"
|
||||||
|
if request.CUDA:
|
||||||
|
device = "cuda"
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Preparing models, please wait", file=sys.stderr)
|
||||||
|
self.model = WhisperModel(request.Model, device=device, compute_type="float16")
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
# Implement your logic here for the LoadModel service
|
||||||
|
# Replace this with your desired response
|
||||||
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
|
def AudioTranscription(self, request, context):
|
||||||
|
resultSegments = []
|
||||||
|
text = ""
|
||||||
|
try:
|
||||||
|
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
|
||||||
|
id = 0
|
||||||
|
for segment in segments:
|
||||||
|
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
||||||
|
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
|
||||||
|
text += segment.text
|
||||||
|
id += 1
|
||||||
|
except Exception as err:
|
||||||
|
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||||
|
|
||||||
|
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||||
|
|
||||||
|
def serve(address):
|
||||||
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
|
server.add_insecure_port(address)
|
||||||
|
server.start()
|
||||||
|
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||||
|
|
||||||
|
# Define the signal handler function
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
print("Received termination signal. Shutting down...")
|
||||||
|
server.stop(0)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Set the signal handlers for SIGINT and SIGTERM
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
server.stop(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
serve(args.addr)
|
||||||
@@ -12,5 +12,3 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
python -m unidic download
|
|
||||||
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
torch==2.4.1
|
||||||
|
optimum-quanto
|
||||||
9
backend/python/faster-whisper/requirements-cublas11.txt
Normal file
9
backend/python/faster-whisper/requirements-cublas11.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
|
torch==2.4.1+cu118
|
||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
torch==2.4.1
|
||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
3
backend/python/faster-whisper/requirements-hipblas.txt
Normal file
3
backend/python/faster-whisper/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
|
torch
|
||||||
|
faster-whisper
|
||||||
@@ -1,8 +1,6 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch==2.3.110+xpu
|
||||||
torch==2.3.1+cxx11.abi
|
torch==2.3.1+cxx11.abi
|
||||||
torchaudio==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
transformers
|
faster-whisper
|
||||||
accelerate
|
|
||||||
9
backend/python/faster-whisper/requirements-l4t.txt
Normal file
9
backend/python/faster-whisper/requirements-l4t.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
faster-whisper
|
||||||
|
opencv-python
|
||||||
|
accelerate
|
||||||
|
compel
|
||||||
|
peft
|
||||||
|
sentencepiece
|
||||||
|
optimum-quanto
|
||||||
3
backend/python/faster-whisper/requirements.txt
Normal file
3
backend/python/faster-whisper/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
grpcio==1.70.0
|
||||||
|
protobuf
|
||||||
|
grpcio-tools
|
||||||
3
backend/python/kokoro/requirements-l4t.txt
Normal file
3
backend/python/kokoro/requirements-l4t.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
phonemizer
|
phonemizer
|
||||||
scipy
|
scipy
|
||||||
|
|||||||
@@ -1,29 +0,0 @@
|
|||||||
.PHONY: mamba
|
|
||||||
mamba: protogen
|
|
||||||
bash install.sh
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running mamba..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "mamba run."
|
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing mamba..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "mamba tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
$(RM) -r venv __pycache__
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
# Creating a separate environment for the mamba project
|
|
||||||
|
|
||||||
```
|
|
||||||
make mamba
|
|
||||||
```
|
|
||||||
@@ -1,179 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
from concurrent import futures
|
|
||||||
import time
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
||||||
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer that implements the Backend service defined in backend.proto.
|
|
||||||
"""
|
|
||||||
def generate(self,prompt, max_new_tokens):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and maximum number of new tokens.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt (str): The prompt to generate text from.
|
|
||||||
max_new_tokens (int): The maximum number of new tokens to generate.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The generated text.
|
|
||||||
"""
|
|
||||||
self.generator.end_beam_search()
|
|
||||||
|
|
||||||
# Tokenizing the input
|
|
||||||
ids = self.generator.tokenizer.encode(prompt)
|
|
||||||
|
|
||||||
self.generator.gen_begin_reuse(ids)
|
|
||||||
initial_len = self.generator.sequence[0].shape[0]
|
|
||||||
has_leading_space = False
|
|
||||||
decoded_text = ''
|
|
||||||
for i in range(max_new_tokens):
|
|
||||||
token = self.generator.gen_single_token()
|
|
||||||
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
|
||||||
has_leading_space = True
|
|
||||||
|
|
||||||
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
|
|
||||||
if has_leading_space:
|
|
||||||
decoded_text = ' ' + decoded_text
|
|
||||||
|
|
||||||
if token.item() == self.generator.tokenizer.eos_token_id:
|
|
||||||
break
|
|
||||||
return decoded_text
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
Returns a health check message.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The health check request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Reply: The health check reply.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
Loads a language model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The load model request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The load model result.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
tokenizerModel = request.Tokenizer
|
|
||||||
if tokenizerModel == "":
|
|
||||||
tokenizerModel = request.Model
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
|
|
||||||
if MAMBA_CHAT:
|
|
||||||
tokenizer.eos_token = "<|endoftext|>"
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
self.tokenizer = tokenizer
|
|
||||||
self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict result.
|
|
||||||
"""
|
|
||||||
if request.TopP == 0:
|
|
||||||
request.TopP = 0.9
|
|
||||||
|
|
||||||
max_tokens = request.Tokens
|
|
||||||
|
|
||||||
if request.Tokens == 0:
|
|
||||||
max_tokens = 2000
|
|
||||||
|
|
||||||
# encoded_input = self.tokenizer(request.Prompt)
|
|
||||||
tokens = self.tokenizer(request.Prompt, return_tensors="pt")
|
|
||||||
input_ids = tokens.input_ids.to(device="cuda")
|
|
||||||
out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
|
|
||||||
top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
|
|
||||||
|
|
||||||
decoded = self.tokenizer.batch_decode(out)
|
|
||||||
|
|
||||||
generated_text = decoded[0]
|
|
||||||
|
|
||||||
# Remove prompt from response if present
|
|
||||||
if request.Prompt in generated_text:
|
|
||||||
generated_text = generated_text.replace(request.Prompt, "")
|
|
||||||
|
|
||||||
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""
|
|
||||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: The predict stream request.
|
|
||||||
context: The gRPC context.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
backend_pb2.Result: The predict stream result.
|
|
||||||
"""
|
|
||||||
yield self.Predict(request, context)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
LIMIT_TARGETS="cublas"
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
causal-conv1d==1.4.0
|
|
||||||
mamba-ssm==2.2.2
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
transformers
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
transformers
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
transformers
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
|
|
||||||
# this also means that we need to install the basic build dependencies into the venv ourselves
|
|
||||||
# https://github.com/Dao-AILab/causal-conv1d/issues/24
|
|
||||||
packaging
|
|
||||||
setuptools
|
|
||||||
wheel
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
certifi
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
LIMIT_TARGETS="cublas"
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import grpc
|
|
||||||
import backend_pb2_grpc
|
|
||||||
import backend_pb2
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service.
|
|
||||||
|
|
||||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
self.service.terminate()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_text(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
req = backend_pb2.PredictOptions(Prompt="The capital of France is")
|
|
||||||
resp = stub.Predict(req)
|
|
||||||
self.assertIsNotNone(resp.message)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("text service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
@@ -1,158 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Extra gRPC server for OpenVoice models.
|
|
||||||
"""
|
|
||||||
from concurrent import futures
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import torch
|
|
||||||
from openvoice import se_extractor
|
|
||||||
from openvoice.api import ToneColorConverter
|
|
||||||
from melo.api import TTS
|
|
||||||
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer for the backend service.
|
|
||||||
|
|
||||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
|
||||||
"""
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that returns the health status of the backend service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A HealthRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Reply object that contains the health status of the backend service.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that loads a model into memory.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A LoadModelRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Result object that contains the result of the LoadModel operation.
|
|
||||||
"""
|
|
||||||
model_name = request.Model
|
|
||||||
try:
|
|
||||||
|
|
||||||
self.clonedVoice = False
|
|
||||||
# Assume directory from request.ModelFile.
|
|
||||||
# Only if request.LoraAdapter it's not an absolute path
|
|
||||||
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
|
|
||||||
# get base path of modelFile
|
|
||||||
modelFileBase = os.path.dirname(request.ModelFile)
|
|
||||||
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
|
||||||
if request.AudioPath != "":
|
|
||||||
self.clonedVoice = True
|
|
||||||
|
|
||||||
self.modelpath = request.ModelFile
|
|
||||||
self.speaker = request.Type
|
|
||||||
self.ClonedVoicePath = request.AudioPath
|
|
||||||
|
|
||||||
ckpt_converter = request.Model+'/converter'
|
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
||||||
self.device = device
|
|
||||||
self.tone_color_converter = None
|
|
||||||
if self.clonedVoice:
|
|
||||||
self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
|
|
||||||
self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
model_name = request.model
|
|
||||||
if model_name == "":
|
|
||||||
return backend_pb2.Result(success=False, message="request.model is required")
|
|
||||||
try:
|
|
||||||
# Speed is adjustable
|
|
||||||
speed = 1.0
|
|
||||||
voice = "EN"
|
|
||||||
if request.voice:
|
|
||||||
voice = request.voice
|
|
||||||
model = TTS(language=voice, device=self.device)
|
|
||||||
speaker_ids = model.hps.data.spk2id
|
|
||||||
speaker_key = self.speaker
|
|
||||||
modelpath = self.modelpath
|
|
||||||
for s in speaker_ids.keys():
|
|
||||||
print(f"Speaker: {s} - ID: {speaker_ids[s]}")
|
|
||||||
speaker_id = speaker_ids[speaker_key]
|
|
||||||
speaker_key = speaker_key.lower().replace('_', '-')
|
|
||||||
source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
|
|
||||||
model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
|
|
||||||
if self.clonedVoice:
|
|
||||||
reference_speaker = self.ClonedVoicePath
|
|
||||||
target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
|
|
||||||
# Run the tone color converter
|
|
||||||
encode_message = "@MyShell"
|
|
||||||
self.tone_color_converter.convert(
|
|
||||||
audio_src_path=request.dst,
|
|
||||||
src_se=source_se,
|
|
||||||
tgt_se=target_se,
|
|
||||||
output_path=request.dst,
|
|
||||||
message=encode_message)
|
|
||||||
|
|
||||||
print("[OpenVoice] TTS generated!", file=sys.stderr)
|
|
||||||
print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
|
|
||||||
print(request, file=sys.stderr)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(success=True)
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("[OpenVoice] Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
print(f"[OpenVoice] startup: {args}", file=sys.stderr)
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
torch==2.4.1+rocm6.0
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
|
||||||
torch==2.3.1+cxx11.abi
|
|
||||||
torchaudio==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
|
||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
librosa==0.9.1
|
|
||||||
faster-whisper==0.9.0
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
inflect==7.0.0
|
|
||||||
unidecode==1.3.7
|
|
||||||
whisper-timestamped==1.14.2
|
|
||||||
openai
|
|
||||||
python-dotenv
|
|
||||||
pypinyin==0.50.0
|
|
||||||
cn2an==0.5.22
|
|
||||||
jieba==0.42.1
|
|
||||||
langid==1.1.6
|
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
grpcio==1.69.0
|
|
||||||
protobuf
|
|
||||||
librosa
|
|
||||||
faster-whisper
|
|
||||||
inflect
|
|
||||||
unidecode
|
|
||||||
openai
|
|
||||||
python-dotenv
|
|
||||||
pypinyin
|
|
||||||
cn2an==0.5.22
|
|
||||||
numpy==1.22.0
|
|
||||||
networkx==2.8.8
|
|
||||||
jieba==0.42.1
|
|
||||||
gradio==5.9.1
|
|
||||||
langid==1.1.6
|
|
||||||
llvmlite==0.43.0
|
|
||||||
setuptools
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
"""
|
|
||||||
A test script to test the gRPC service
|
|
||||||
"""
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
"""
|
|
||||||
This method sets up the gRPC service by starting the server
|
|
||||||
"""
|
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(30)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
"""
|
|
||||||
This method tears down the gRPC service by terminating the server
|
|
||||||
"""
|
|
||||||
self.service.terminate()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
"""
|
|
||||||
This method tests if the server starts up successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2",
|
|
||||||
Type="en-us"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_tts(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
|
|
||||||
tts_response = stub.TTS(tts_request)
|
|
||||||
self.assertIsNotNone(tts_response)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("TTS service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# Download checkpoints if not present
|
|
||||||
if [ ! -d "checkpoints_v2" ]; then
|
|
||||||
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
|
|
||||||
unzip checkpoints_v2.zip
|
|
||||||
fi
|
|
||||||
|
|
||||||
runUnittests
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
export CONDA_ENV_PATH = "parler.yml"
|
|
||||||
SKIP_CONDA?=0
|
|
||||||
ifeq ($(BUILD_TYPE), cublas)
|
|
||||||
export CONDA_ENV_PATH = "parler-nvidia.yml"
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Intel GPU are supposed to have dependencies installed in the main python
|
|
||||||
# environment, so we skip conda installation for SYCL builds.
|
|
||||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
export SKIP_CONDA=1
|
|
||||||
endif
|
|
||||||
|
|
||||||
.PHONY: parler-tts
|
|
||||||
parler-tts:
|
|
||||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
|
||||||
bash install.sh $(CONDA_ENV_PATH)
|
|
||||||
$(MAKE) protogen
|
|
||||||
|
|
||||||
.PHONY: run
|
|
||||||
run: protogen
|
|
||||||
@echo "Running transformers..."
|
|
||||||
bash run.sh
|
|
||||||
@echo "transformers run."
|
|
||||||
|
|
||||||
.PHONY: test
|
|
||||||
test: protogen
|
|
||||||
@echo "Testing transformers..."
|
|
||||||
bash test.sh
|
|
||||||
@echo "transformers tested."
|
|
||||||
|
|
||||||
.PHONY: protogen
|
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
.PHONY: protogen-clean
|
|
||||||
protogen-clean:
|
|
||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
|
||||||
bash protogen.sh
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean: protogen-clean
|
|
||||||
$(RM) -r venv __pycache__
|
|
||||||
@@ -1,125 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Extra gRPC server for MusicgenForConditionalGeneration models.
|
|
||||||
"""
|
|
||||||
from concurrent import futures
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
from scipy.io.wavfile import write as write_wav
|
|
||||||
|
|
||||||
from parler_tts import ParlerTTSForConditionalGeneration
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
import soundfile as sf
|
|
||||||
import torch
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
||||||
"""
|
|
||||||
A gRPC servicer for the backend service.
|
|
||||||
|
|
||||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
|
||||||
"""
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that returns the health status of the backend service.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A HealthRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Reply object that contains the health status of the backend service.
|
|
||||||
"""
|
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""
|
|
||||||
A gRPC method that loads a model into memory.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
request: A LoadModelRequest object that contains the request parameters.
|
|
||||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A Result object that contains the result of the LoadModel operation.
|
|
||||||
"""
|
|
||||||
model_name = request.Model
|
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
||||||
try:
|
|
||||||
self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
model_name = request.model
|
|
||||||
voice = request.voice
|
|
||||||
if voice == "":
|
|
||||||
voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
|
||||||
if model_name == "":
|
|
||||||
return backend_pb2.Result(success=False, message="request.model is required")
|
|
||||||
try:
|
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
||||||
input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
|
|
||||||
prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
|
|
||||||
|
|
||||||
generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
|
||||||
audio_arr = generation.cpu().numpy().squeeze()
|
|
||||||
print("[parler-tts] TTS generated!", file=sys.stderr)
|
|
||||||
sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
|
|
||||||
print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
|
|
||||||
print("[parler-tts] TTS for", file=sys.stderr)
|
|
||||||
print(request, file=sys.stderr)
|
|
||||||
except Exception as err:
|
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
||||||
return backend_pb2.Result(success=True)
|
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
||||||
server.add_insecure_port(address)
|
|
||||||
server.start()
|
|
||||||
print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
|
|
||||||
|
|
||||||
# Define the signal handler function
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
print("[parler-tts] Received termination signal. Shutting down...")
|
|
||||||
server.stop(0)
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set the signal handlers for SIGINT and SIGTERM
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
server.stop(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
||||||
parser.add_argument(
|
|
||||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
print(f"[parler-tts] startup: {args}", file=sys.stderr)
|
|
||||||
serve(args.addr)
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
|
||||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
|
||||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
|
||||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
|
||||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|
||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
installRequirements
|
|
||||||
|
|
||||||
|
|
||||||
# https://github.com/descriptinc/audiotools/issues/101
|
|
||||||
# incompatible protobuf versions.
|
|
||||||
PYDIR=python3.10
|
|
||||||
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
|
||||||
|
|
||||||
if [ ! -d ${pyenv} ]; then
|
|
||||||
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
|
||||||
llvmlite==0.43.0
|
|
||||||
numba==0.60.0
|
|
||||||
grpcio-tools==1.42.0
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
transformers
|
|
||||||
accelerate
|
|
||||||
torch==2.4.1
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
|
||||||
torch==2.4.1+cu118
|
|
||||||
torchaudio==2.4.1+cu118
|
|
||||||
transformers
|
|
||||||
accelerate
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
torch==2.4.1
|
|
||||||
torchaudio==2.4.1
|
|
||||||
transformers
|
|
||||||
accelerate
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
|
||||||
torch==2.3.0+rocm6.0
|
|
||||||
torchaudio==2.3.0+rocm6.0
|
|
||||||
transformers
|
|
||||||
accelerate
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
grpcio==1.69.0
|
|
||||||
certifi
|
|
||||||
llvmlite==0.43.0
|
|
||||||
setuptools
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
startBackend $@
|
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
"""
|
|
||||||
A test script to test the gRPC service
|
|
||||||
"""
|
|
||||||
import unittest
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import backend_pb2
|
|
||||||
import backend_pb2_grpc
|
|
||||||
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
|
|
||||||
class TestBackendServicer(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
TestBackendServicer is the class that tests the gRPC service
|
|
||||||
"""
|
|
||||||
def setUp(self):
|
|
||||||
"""
|
|
||||||
This method sets up the gRPC service by starting the server
|
|
||||||
"""
|
|
||||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
|
||||||
"""
|
|
||||||
This method tears down the gRPC service by terminating the server
|
|
||||||
"""
|
|
||||||
self.service.terminate()
|
|
||||||
self.service.wait()
|
|
||||||
|
|
||||||
def test_server_startup(self):
|
|
||||||
"""
|
|
||||||
This method tests if the server starts up successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.Health(backend_pb2.HealthMessage())
|
|
||||||
self.assertEqual(response.message, b'OK')
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("Server failed to start")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_load_model(self):
|
|
||||||
"""
|
|
||||||
This method tests if the model is loaded successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
self.assertEqual(response.message, "Model loaded successfully")
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("LoadModel service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
|
|
||||||
def test_tts(self):
|
|
||||||
"""
|
|
||||||
This method tests if the embeddings are generated successfully
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
self.setUp()
|
|
||||||
with grpc.insecure_channel("localhost:50051") as channel:
|
|
||||||
stub = backend_pb2_grpc.BackendStub(channel)
|
|
||||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
|
||||||
self.assertTrue(response.success)
|
|
||||||
tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
|
|
||||||
tts_response = stub.TTS(tts_request)
|
|
||||||
self.assertIsNotNone(tts_response)
|
|
||||||
except Exception as err:
|
|
||||||
print(err)
|
|
||||||
self.fail("TTS service failed")
|
|
||||||
finally:
|
|
||||||
self.tearDown()
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
runUnittests
|
|
||||||
5
backend/python/rerankers/requirements-l4t.txt
Normal file
5
backend/python/rerankers/requirements-l4t.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
transformers
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
rerankers[transformers]
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -21,7 +21,7 @@ import torch.cuda
|
|||||||
|
|
||||||
|
|
||||||
XPU=os.environ.get("XPU", "0") == "1"
|
XPU=os.environ.get("XPU", "0") == "1"
|
||||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
|
||||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||||
from scipy.io import wavfile
|
from scipy.io import wavfile
|
||||||
import outetts
|
import outetts
|
||||||
@@ -245,6 +245,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
autoTokenizer = False
|
autoTokenizer = False
|
||||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||||
self.SentenceTransformer = True
|
self.SentenceTransformer = True
|
||||||
|
elif request.Type == "Mamba":
|
||||||
|
autoTokenizer = False
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
self.model = MambaForCausalLM.from_pretrained(model_name)
|
||||||
else:
|
else:
|
||||||
print("Automodel", file=sys.stderr)
|
print("Automodel", file=sys.stderr)
|
||||||
self.model = AutoModel.from_pretrained(model_name,
|
self.model = AutoModel.from_pretrained(model_name,
|
||||||
|
|||||||
@@ -5,4 +5,4 @@ accelerate
|
|||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.4.1
|
||||||
@@ -6,4 +6,4 @@ accelerate
|
|||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.4.1
|
||||||
|
|||||||
@@ -5,4 +5,4 @@ numba==0.60.0
|
|||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.4.1
|
||||||
|
|||||||
@@ -7,4 +7,4 @@ numba==0.60.0
|
|||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.4.1
|
||||||
|
|||||||
@@ -8,4 +8,4 @@ numba==0.60.0
|
|||||||
intel-extension-for-transformers
|
intel-extension-for-transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
outetts
|
outetts
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.4.1
|
||||||
|
|||||||
9
backend/python/transformers/requirements-l4t.txt
Normal file
9
backend/python/transformers/requirements-l4t.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
torch
|
||||||
|
accelerate
|
||||||
|
llvmlite==0.43.0
|
||||||
|
numba==0.60.0
|
||||||
|
transformers
|
||||||
|
bitsandbytes
|
||||||
|
outetts
|
||||||
|
sentence-transformers==3.4.1
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
vllm
|
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1
|
torch==2.4.1
|
||||||
transformers
|
transformers
|
||||||
|
vllm
|
||||||
@@ -2,4 +2,5 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1+cu118
|
torch==2.4.1+cu118
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
|
vllm
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1
|
torch==2.4.1
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
|
vllm
|
||||||
@@ -2,4 +2,5 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch==2.4.1+rocm6.0
|
torch==2.4.1+rocm6.0
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
|
vllm
|
||||||
@@ -6,4 +6,5 @@ transformers
|
|||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
oneccl_bind_pt==2.3.100+xpu
|
||||||
|
vllm
|
||||||
7
backend/python/vllm/requirements-l4t.txt
Normal file
7
backend/python/vllm/requirements-l4t.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
--index-url https://pypi.jetson-ai-lab.dev/jp6/cu126/
|
||||||
|
accelerate
|
||||||
|
torch
|
||||||
|
vllm
|
||||||
|
transformers
|
||||||
|
bitsandbytes
|
||||||
|
flash-attn
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.70.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
|
||||||
log.Error().Err(err).Msg("error installing models")
|
log.Error().Err(err).Msg("error installing models")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -118,9 +118,19 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
nGPULayers = *c.NGPULayers
|
nGPULayers = *c.NGPULayers
|
||||||
}
|
}
|
||||||
|
|
||||||
|
triggers := make([]*pb.GrammarTrigger, 0)
|
||||||
|
for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
|
||||||
|
triggers = append(triggers, &pb.GrammarTrigger{
|
||||||
|
Word: t.Word,
|
||||||
|
AtStart: t.AtStart,
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return &pb.ModelOptions{
|
return &pb.ModelOptions{
|
||||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||||
SchedulerType: c.Diffusers.SchedulerType,
|
SchedulerType: c.Diffusers.SchedulerType,
|
||||||
|
GrammarTriggers: triggers,
|
||||||
PipelineType: c.Diffusers.PipelineType,
|
PipelineType: c.Diffusers.PipelineType,
|
||||||
CFGScale: c.CFGScale,
|
CFGScale: c.CFGScale,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
|
|||||||
@@ -16,12 +16,7 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
|||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
||||||
|
|
||||||
if backendConfig.Backend == "" {
|
inferenceModel, err = loader.Load(opts...)
|
||||||
inferenceModel, err = loader.Load(opts...)
|
|
||||||
} else {
|
|
||||||
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
|
||||||
inferenceModel, err = loader.Load(opts...)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return schema.TokenizeResponse{}, err
|
return schema.TokenizeResponse{}, err
|
||||||
}
|
}
|
||||||
@@ -35,6 +30,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
|||||||
return schema.TokenizeResponse{}, err
|
return schema.TokenizeResponse{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if resp.Tokens == nil {
|
||||||
|
resp.Tokens = make([]int32, 0)
|
||||||
|
}
|
||||||
|
|
||||||
return schema.TokenizeResponse{
|
return schema.TokenizeResponse{
|
||||||
Tokens: resp.Tokens,
|
Tokens: resp.Tokens,
|
||||||
}, nil
|
}, nil
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
|||||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ type RunCMD struct {
|
|||||||
|
|
||||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||||
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
|
||||||
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
|
|
||||||
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
|
||||||
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
|
||||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||||
@@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
|
||||||
config.WithF16(r.F16),
|
config.WithF16(r.F16),
|
||||||
config.WithStringGalleries(r.Galleries),
|
config.WithStringGalleries(r.Galleries),
|
||||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
|
||||||
config.WithCors(r.CORS),
|
config.WithCors(r.CORS),
|
||||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||||
config.WithCsrf(r.CSRF),
|
config.WithCsrf(r.CSRF),
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user