mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-19 14:17:21 -04:00
Compare commits
128 Commits
v2.12.1
...
dave-gray1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c97440be5 | ||
|
|
4ae4e44506 | ||
|
|
2ada13b1ad | ||
|
|
5d170e9264 | ||
|
|
1b0a64aa46 | ||
|
|
aa8e1c63d5 | ||
|
|
60690c9fc4 | ||
|
|
758b0c9042 | ||
|
|
48d0aa2f6d | ||
|
|
b664edde29 | ||
|
|
e16658b7ec | ||
|
|
d30280ed23 | ||
|
|
9dbd217c59 | ||
|
|
23eac98b3c | ||
|
|
4fffc47e77 | ||
|
|
d65214a234 | ||
|
|
2fb34b00b5 | ||
|
|
f718a391c0 | ||
|
|
ac56ac2b2d | ||
|
|
34c3f563fd | ||
|
|
d2bea6f9e3 | ||
|
|
a09fe1b9ba | ||
|
|
55778b35ff | ||
|
|
8b169f1dac | ||
|
|
d344daf129 | ||
|
|
3411e072ca | ||
|
|
8e36fe9b6f | ||
|
|
0d8bf91699 | ||
|
|
bd507678be | ||
|
|
b6f0e80d54 | ||
|
|
729378ca98 | ||
|
|
220958a87c | ||
|
|
f3f6535aad | ||
|
|
228bc4903f | ||
|
|
38c9abed8b | ||
|
|
66b002458d | ||
|
|
39814cab32 | ||
|
|
180cd4ccda | ||
|
|
284ad026b1 | ||
|
|
afa1bca1e3 | ||
|
|
03adc1f60d | ||
|
|
b319ed58b0 | ||
|
|
8d30b39811 | ||
|
|
1038f7469c | ||
|
|
b9e7708643 | ||
|
|
1e37101930 | ||
|
|
b2772509b4 | ||
|
|
27ec84827c | ||
|
|
852316c5a6 | ||
|
|
e9448005a5 | ||
|
|
bbea62b907 | ||
|
|
13012cfa70 | ||
|
|
8f2681f904 | ||
|
|
f9c75d4878 | ||
|
|
502c1eedaa | ||
|
|
e9f090257c | ||
|
|
af9e5a2d05 | ||
|
|
af8c705ecd | ||
|
|
5763dc1613 | ||
|
|
6b06d4e0af | ||
|
|
bcaa320f36 | ||
|
|
33c78d2228 | ||
|
|
df4a13a08b | ||
|
|
fdec8a9d00 | ||
|
|
0cc1ad2188 | ||
|
|
cdece3879f | ||
|
|
320d8a48d9 | ||
|
|
46609e936e | ||
|
|
b72c6cc9fc | ||
|
|
538a086309 | ||
|
|
c751a4ac06 | ||
|
|
e843d7df0e | ||
|
|
de3a1a0a8e | ||
|
|
57bd365d87 | ||
|
|
b739cbb86b | ||
|
|
4486db912b | ||
|
|
6b07ded119 | ||
|
|
d5699dbf4f | ||
|
|
0fdff26924 | ||
|
|
619f2517a4 | ||
|
|
b91820b7f8 | ||
|
|
4e74560649 | ||
|
|
95244ed6e7 | ||
|
|
f1f39eea3f | ||
|
|
eed5706994 | ||
|
|
1981154f49 | ||
|
|
a8ebf6f575 | ||
|
|
912d2dccfa | ||
|
|
fcb63aed8a | ||
|
|
0e549424e7 | ||
|
|
69d638268b | ||
|
|
18eea9088a | ||
|
|
fb105837ba | ||
|
|
7e52c8e21a | ||
|
|
d068839896 | ||
|
|
e0dee52a2a | ||
|
|
677e20756b | ||
|
|
b2785ff06e | ||
|
|
da82ce81b5 | ||
|
|
70c4f110a4 | ||
|
|
099bd54ff2 | ||
|
|
12c0d9443e | ||
|
|
cbda06fb96 | ||
|
|
b1a242251c | ||
|
|
fce606fc0f | ||
|
|
b606c7b768 | ||
|
|
0a6956b029 | ||
|
|
821cf0e3fd | ||
|
|
11a0418510 | ||
|
|
40781ac013 | ||
|
|
fdfd868953 | ||
|
|
0795975486 | ||
|
|
a49248d29f | ||
|
|
182fef339d | ||
|
|
c74dec7e38 | ||
|
|
b4548ad72d | ||
|
|
e152b07b74 | ||
|
|
0e44a4e664 | ||
|
|
24d7dadfed | ||
|
|
92005b9c02 | ||
|
|
636d487dc8 | ||
|
|
93f51d80d4 | ||
|
|
36da11a0ee | ||
|
|
d23e73b118 | ||
|
|
d692b2c32a | ||
|
|
7e2f8bb408 | ||
|
|
951e39d36c | ||
|
|
aeb3f835ae |
@@ -1,6 +1,11 @@
|
|||||||
.idea
|
.idea
|
||||||
|
.github
|
||||||
|
.vscode
|
||||||
models
|
models
|
||||||
examples/chatbot-ui/models
|
examples/chatbot-ui/models
|
||||||
examples/rwkv/models
|
examples/rwkv/models
|
||||||
examples/**/models
|
examples/**/models
|
||||||
Dockerfile*
|
Dockerfile*
|
||||||
|
|
||||||
|
# SonarQube
|
||||||
|
.scannerwork
|
||||||
38
.env
38
.env
@@ -1,33 +1,33 @@
|
|||||||
## Set number of threads.
|
## Set number of threads.
|
||||||
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
|
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
|
||||||
# THREADS=14
|
# LOCALAI_THREADS=14
|
||||||
|
|
||||||
## Specify a different bind address (defaults to ":8080")
|
## Specify a different bind address (defaults to ":8080")
|
||||||
# ADDRESS=127.0.0.1:8080
|
# LOCALAI_ADDRESS=127.0.0.1:8080
|
||||||
|
|
||||||
## Default models context size
|
## Default models context size
|
||||||
# CONTEXT_SIZE=512
|
# LOCALAI_CONTEXT_SIZE=512
|
||||||
#
|
#
|
||||||
## Define galleries.
|
## Define galleries.
|
||||||
## models will to install will be visible in `/models/available`
|
## models will to install will be visible in `/models/available`
|
||||||
# GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
# LOCALAI_GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
|
||||||
|
|
||||||
## CORS settings
|
## CORS settings
|
||||||
# CORS=true
|
# LOCALAI_CORS=true
|
||||||
# CORS_ALLOW_ORIGINS=*
|
# LOCALAI_CORS_ALLOW_ORIGINS=*
|
||||||
|
|
||||||
## Default path for models
|
## Default path for models
|
||||||
#
|
#
|
||||||
# MODELS_PATH=/models
|
# LOCALAI_MODELS_PATH=/models
|
||||||
|
|
||||||
## Enable debug mode
|
## Enable debug mode
|
||||||
# DEBUG=true
|
# LOCALAI_LOG_LEVEL=debug
|
||||||
|
|
||||||
## Disables COMPEL (Diffusers)
|
## Disables COMPEL (Diffusers)
|
||||||
# COMPEL=0
|
# COMPEL=0
|
||||||
|
|
||||||
## Enable/Disable single backend (useful if only one GPU is available)
|
## Enable/Disable single backend (useful if only one GPU is available)
|
||||||
# SINGLE_ACTIVE_BACKEND=true
|
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
|
||||||
|
|
||||||
## Specify a build type. Available: cublas, openblas, clblas.
|
## Specify a build type. Available: cublas, openblas, clblas.
|
||||||
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
|
||||||
@@ -46,13 +46,13 @@
|
|||||||
# GO_TAGS=stablediffusion
|
# GO_TAGS=stablediffusion
|
||||||
|
|
||||||
## Path where to store generated images
|
## Path where to store generated images
|
||||||
# IMAGE_PATH=/tmp
|
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||||
|
|
||||||
## Specify a default upload limit in MB (whisper)
|
## Specify a default upload limit in MB (whisper)
|
||||||
# UPLOAD_LIMIT
|
# LOCALAI_UPLOAD_LIMIT=15
|
||||||
|
|
||||||
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
|
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
|
||||||
# EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
|
# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
|
||||||
|
|
||||||
### Advanced settings ###
|
### Advanced settings ###
|
||||||
### Those are not really used by LocalAI, but from components in the stack ###
|
### Those are not really used by LocalAI, but from components in the stack ###
|
||||||
@@ -72,18 +72,18 @@
|
|||||||
# LLAMACPP_PARALLEL=1
|
# LLAMACPP_PARALLEL=1
|
||||||
|
|
||||||
### Enable to run parallel requests
|
### Enable to run parallel requests
|
||||||
# PARALLEL_REQUESTS=true
|
# LOCALAI_PARALLEL_REQUESTS=true
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
# WATCHDOG_IDLE=true
|
# LOCALAI_WATCHDOG_IDLE=true
|
||||||
#
|
|
||||||
# Enables watchdog to kill backends that are busy for too much time
|
|
||||||
# WATCHDOG_BUSY=true
|
|
||||||
#
|
#
|
||||||
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
|
||||||
# WATCHDOG_IDLE_TIMEOUT=5m
|
# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
|
||||||
|
#
|
||||||
|
# Enables watchdog to kill backends that are busy for too much time
|
||||||
|
# LOCALAI_WATCHDOG_BUSY=true
|
||||||
#
|
#
|
||||||
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
|
||||||
# WATCHDOG_BUSY_TIMEOUT=5m
|
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m
|
||||||
25
.github/dependabot.yml
vendored
Normal file
25
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||||
|
version: 2
|
||||||
|
updates:
|
||||||
|
- package-ecosystem: "gomod"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "github-actions"
|
||||||
|
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
# Check for updates to GitHub Actions every weekday
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
# Check for updates to GitHub Actions every weekday
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "docker"
|
||||||
|
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
# Check for updates to GitHub Actions every weekday
|
||||||
|
interval: "weekly"
|
||||||
2
.github/workflows/bump_deps.yaml
vendored
2
.github/workflows/bump_deps.yaml
vendored
@@ -49,7 +49,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v5
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash .github/bump_docs.sh ${{ matrix.repository }}
|
bash .github/bump_docs.sh ${{ matrix.repository }}
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v5
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
|||||||
43
.github/workflows/dependabot_auto.yml
vendored
Normal file
43
.github/workflows/dependabot_auto.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
name: Dependabot auto-merge
|
||||||
|
on:
|
||||||
|
- pull_request_target
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
packages: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
dependabot:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ github.actor == 'dependabot[bot]' }}
|
||||||
|
steps:
|
||||||
|
- name: Dependabot metadata
|
||||||
|
id: metadata
|
||||||
|
uses: dependabot/fetch-metadata@v2.0.0
|
||||||
|
with:
|
||||||
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
|
skip-commit-verification: true
|
||||||
|
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Approve a PR if not already approved
|
||||||
|
run: |
|
||||||
|
gh pr checkout "$PR_URL"
|
||||||
|
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
|
||||||
|
then
|
||||||
|
gh pr review --approve "$PR_URL"
|
||||||
|
else
|
||||||
|
echo "PR already approved.";
|
||||||
|
fi
|
||||||
|
env:
|
||||||
|
PR_URL: ${{github.event.pull_request.html_url}}
|
||||||
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||||
|
|
||||||
|
- name: Enable auto-merge for Dependabot PRs
|
||||||
|
if: ${{ contains(github.event.pull_request.title, 'bump')}}
|
||||||
|
run: gh pr merge --auto --squash "$PR_URL"
|
||||||
|
env:
|
||||||
|
PR_URL: ${{github.event.pull_request.html_url}}
|
||||||
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||||
90
.github/workflows/generate_grpc_cache.yaml
vendored
Normal file
90
.github/workflows/generate_grpc_cache.yaml
vendored
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
name: 'generate and publish GRPC docker caches'
|
||||||
|
|
||||||
|
on:
|
||||||
|
- workflow_dispatch
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: grpc-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
generate_caches:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- grpc-base-image: ubuntu:22.04
|
||||||
|
runs-on: 'ubuntu-latest'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
runs-on: ${{matrix.runs-on}}
|
||||||
|
steps:
|
||||||
|
- name: Release space from worker
|
||||||
|
if: matrix.runs-on == 'ubuntu-latest'
|
||||||
|
run: |
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
df -h
|
||||||
|
echo
|
||||||
|
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo rm -rf /usr/local/lib/android
|
||||||
|
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
sudo rm -rf /usr/share/dotnet
|
||||||
|
sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
sudo apt-get remove -y 'php.*' || true
|
||||||
|
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
sudo apt-get remove -y '^google-.*' || true
|
||||||
|
sudo apt-get remove -y azure-cli || true
|
||||||
|
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
sudo apt-get remove -y microsoft-edge-stable || true
|
||||||
|
sudo apt-get remove -y firefox || true
|
||||||
|
sudo apt-get remove -y powershell || true
|
||||||
|
sudo apt-get remove -y r-base-core || true
|
||||||
|
sudo apt-get autoremove -y
|
||||||
|
sudo apt-get clean
|
||||||
|
echo
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
sudo rm -rfv build || true
|
||||||
|
sudo rm -rf /usr/share/dotnet || true
|
||||||
|
sudo rm -rf /opt/ghc || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@master
|
||||||
|
with:
|
||||||
|
platforms: all
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
id: buildx
|
||||||
|
uses: docker/setup-buildx-action@master
|
||||||
|
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Cache GRPC
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
|
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||||
|
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||||
|
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||||
|
build-args: |
|
||||||
|
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||||
|
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
|
GRPC_VERSION=v1.58.0
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile
|
||||||
|
cache-to: type=gha,ignore-error=true
|
||||||
|
target: grpc
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
push: false
|
||||||
9
.github/workflows/image-pr.yml
vendored
9
.github/workflows/image-pr.yml
vendored
@@ -22,6 +22,7 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
@@ -61,12 +62,14 @@ jobs:
|
|||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: 'sycl-f16-ffmpeg'
|
tag-suffix: 'sycl-f16-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
@@ -85,6 +88,7 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
@@ -102,11 +106,12 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=5 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@@ -122,4 +127,4 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=5 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
22
.github/workflows/image.yml
vendored
22
.github/workflows/image.yml
vendored
@@ -26,6 +26,7 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
aio: ${{ matrix.aio }}
|
aio: ${{ matrix.aio }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
latest-image: ${{ matrix.latest-image }}
|
latest-image: ${{ matrix.latest-image }}
|
||||||
@@ -129,6 +130,7 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
aio: "-aio-gpu-hipblas"
|
aio: "-aio-gpu-hipblas"
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
latest-image: 'latest-gpu-hipblas'
|
latest-image: 'latest-gpu-hipblas'
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
@@ -140,12 +142,14 @@ jobs:
|
|||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16-ffmpeg'
|
tag-suffix: '-sycl-f16-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
@@ -158,6 +162,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32-ffmpeg'
|
tag-suffix: '-sycl-f32-ffmpeg'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
@@ -171,6 +176,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16-core'
|
tag-suffix: '-sycl-f16-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@@ -180,6 +186,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32-core'
|
tag-suffix: '-sycl-f32-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@@ -189,6 +196,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@@ -198,6 +206,7 @@ jobs:
|
|||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@@ -210,6 +219,7 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
- build-type: 'hipblas'
|
||||||
@@ -219,6 +229,7 @@ jobs:
|
|||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
|
||||||
@@ -236,6 +247,7 @@ jobs:
|
|||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
aio: ${{ matrix.aio }}
|
aio: ${{ matrix.aio }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
latest-image: ${{ matrix.latest-image }}
|
latest-image: ${{ matrix.latest-image }}
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
@@ -258,7 +270,7 @@ jobs:
|
|||||||
aio: "-aio-cpu"
|
aio: "-aio-cpu"
|
||||||
latest-image: 'latest-cpu'
|
latest-image: 'latest-cpu'
|
||||||
latest-image-aio: 'latest-aio-cpu'
|
latest-image-aio: 'latest-aio-cpu'
|
||||||
makeflags: "--jobs=5 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -269,7 +281,7 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=5 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -280,7 +292,7 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
makeflags: "--jobs=5 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -291,7 +303,7 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=5 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@@ -302,4 +314,4 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=5 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
|
|||||||
17
.github/workflows/image_build.yml
vendored
17
.github/workflows/image_build.yml
vendored
@@ -6,6 +6,10 @@ on:
|
|||||||
inputs:
|
inputs:
|
||||||
base-image:
|
base-image:
|
||||||
description: 'Base image'
|
description: 'Base image'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
grpc-base-image:
|
||||||
|
description: 'GRPC Base image, must be a compatible image with base-image'
|
||||||
required: false
|
required: false
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
@@ -57,7 +61,7 @@ on:
|
|||||||
makeflags:
|
makeflags:
|
||||||
description: 'Make Flags'
|
description: 'Make Flags'
|
||||||
required: false
|
required: false
|
||||||
default: '--jobs=3 --output-sync=target'
|
default: '--jobs=4 --output-sync=target'
|
||||||
type: string
|
type: string
|
||||||
aio:
|
aio:
|
||||||
description: 'AIO Image Name'
|
description: 'AIO Image Name'
|
||||||
@@ -201,15 +205,16 @@ jobs:
|
|||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
|
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||||
|
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||||
|
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||||
build-args: |
|
build-args: |
|
||||||
IMAGE_TYPE=${{ inputs.image-type }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
BASE_IMAGE=${{ inputs.base-image }}
|
MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
|
||||||
GRPC_VERSION=v1.58.0
|
GRPC_VERSION=v1.58.0
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,ignore-error=true
|
|
||||||
target: grpc
|
target: grpc
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
push: false
|
push: false
|
||||||
@@ -280,6 +285,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||||
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
|
docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
|
||||||
|
docker push localai/localai:${{ inputs.latest-image }}
|
||||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
||||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
|
||||||
@@ -289,6 +295,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
|
docker pull localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
|
||||||
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
|
docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
|
||||||
|
docker push localai/localai:${{ inputs.latest-image-aio }}
|
||||||
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }}
|
||||||
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||||
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
|
||||||
|
|||||||
35
.github/workflows/localaibot_automerge.yml
vendored
Normal file
35
.github/workflows/localaibot_automerge.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
name: LocalAI-bot auto-merge
|
||||||
|
on:
|
||||||
|
- pull_request_target
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
packages: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
dependabot:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: ${{ github.actor == 'localai-bot' }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Approve a PR if not already approved
|
||||||
|
run: |
|
||||||
|
gh pr checkout "$PR_URL"
|
||||||
|
if [ "$(gh pr status --json reviewDecision -q .currentBranch.reviewDecision)" != "APPROVED" ];
|
||||||
|
then
|
||||||
|
gh pr review --approve "$PR_URL"
|
||||||
|
else
|
||||||
|
echo "PR already approved.";
|
||||||
|
fi
|
||||||
|
env:
|
||||||
|
PR_URL: ${{github.event.pull_request.html_url}}
|
||||||
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||||
|
|
||||||
|
- name: Enable auto-merge for LocalAIBot PRs
|
||||||
|
run: gh pr merge --auto --squash "$PR_URL"
|
||||||
|
env:
|
||||||
|
PR_URL: ${{github.event.pull_request.html_url}}
|
||||||
|
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||||
92
.github/workflows/release.yaml
vendored
92
.github/workflows/release.yaml
vendored
@@ -1,6 +1,8 @@
|
|||||||
name: Build and Release
|
name: Build and Release
|
||||||
|
|
||||||
on: push
|
on:
|
||||||
|
- push
|
||||||
|
- pull_request
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GRPC_VERSION: v1.58.0
|
GRPC_VERSION: v1.58.0
|
||||||
@@ -33,14 +35,14 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v4
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '1.21.x'
|
||||||
cache: false
|
cache: false
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential ffmpeg protobuf-compiler
|
||||||
- name: Install CUDA Dependencies
|
- name: Install CUDA Dependencies
|
||||||
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
|
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
|
||||||
run: |
|
run: |
|
||||||
@@ -55,7 +57,7 @@ jobs:
|
|||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v3
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: grpc
|
path: grpc
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
||||||
@@ -75,6 +77,9 @@ jobs:
|
|||||||
CMAKE_ARGS: "${{ matrix.defines }}"
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
BUILD_ID: "${{ matrix.build }}"
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
run: |
|
run: |
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
|
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
|
||||||
export BUILD_TYPE=cublas
|
export BUILD_TYPE=cublas
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
@@ -82,12 +87,12 @@ jobs:
|
|||||||
else
|
else
|
||||||
STATIC=true make dist
|
STATIC=true make dist
|
||||||
fi
|
fi
|
||||||
- uses: actions/upload-artifact@v3
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.build }}
|
name: LocalAI-linux-${{ matrix.build }}
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
- name: Release
|
||||||
uses: softprops/action-gh-release@v1
|
uses: softprops/action-gh-release@v2
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
@@ -100,27 +105,24 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v4
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '1.21.x'
|
||||||
cache: false
|
cache: false
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get install -y --no-install-recommends libopencv-dev
|
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
- name: Build stablediffusion
|
- name: Build stablediffusion
|
||||||
run: |
|
run: |
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
make backend-assets/grpc/stablediffusion
|
make backend-assets/grpc/stablediffusion
|
||||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||||
- uses: actions/upload-artifact@v3
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: stablediffusion
|
name: stablediffusion
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v1
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
files: |
|
|
||||||
release/*
|
|
||||||
|
|
||||||
build-macOS:
|
build-macOS:
|
||||||
strategy:
|
strategy:
|
||||||
@@ -138,13 +140,15 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- uses: actions/setup-go@v4
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: '1.21.x'
|
go-version: '1.21.x'
|
||||||
cache: false
|
cache: false
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
brew install protobuf grpc
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
env:
|
env:
|
||||||
@@ -153,13 +157,61 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
make dist
|
make dist
|
||||||
- uses: actions/upload-artifact@v3
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.build }}
|
name: LocalAI-MacOS-${{ matrix.build }}
|
||||||
path: release/
|
path: release/
|
||||||
- name: Release
|
- name: Release
|
||||||
uses: softprops/action-gh-release@v1
|
uses: softprops/action-gh-release@v2
|
||||||
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
|
with:
|
||||||
|
files: |
|
||||||
|
release/*
|
||||||
|
|
||||||
|
|
||||||
|
build-macOS-arm64:
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build: 'avx2'
|
||||||
|
defines: ''
|
||||||
|
- build: 'avx'
|
||||||
|
defines: '-DLLAMA_AVX2=OFF'
|
||||||
|
- build: 'avx512'
|
||||||
|
defines: '-DLLAMA_AVX512=ON'
|
||||||
|
runs-on: macos-14
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: '1.21.x'
|
||||||
|
cache: false
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
brew install protobuf grpc
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
|
- name: Build
|
||||||
|
id: build
|
||||||
|
env:
|
||||||
|
CMAKE_ARGS: "${{ matrix.defines }}"
|
||||||
|
BUILD_ID: "${{ matrix.build }}"
|
||||||
|
run: |
|
||||||
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||||
|
export PATH=$PATH:$GOPATH/bin
|
||||||
|
make dist
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: LocalAI-MacOS-arm64-${{ matrix.build }}
|
||||||
|
path: release/
|
||||||
|
- name: Release
|
||||||
|
uses: softprops/action-gh-release@v2
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
if: startsWith(github.ref, 'refs/tags/')
|
||||||
with:
|
with:
|
||||||
files: |
|
files: |
|
||||||
|
|||||||
9
.github/workflows/secscan.yaml
vendored
9
.github/workflows/secscan.yaml
vendored
@@ -14,14 +14,17 @@ jobs:
|
|||||||
GO111MODULE: on
|
GO111MODULE: on
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Source
|
- name: Checkout Source
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v4
|
||||||
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@master
|
uses: securego/gosec@master
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
- name: Upload SARIF file
|
- name: Upload SARIF file
|
||||||
uses: github/codeql-action/upload-sarif@v2
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
|
uses: github/codeql-action/upload-sarif@v3
|
||||||
with:
|
with:
|
||||||
# Path to SARIF file relative to the root of the repository
|
# Path to SARIF file relative to the root of the repository
|
||||||
sarif_file: results.sarif
|
sarif_file: results.sarif
|
||||||
|
|||||||
89
.github/workflows/test-extra.yml
vendored
89
.github/workflows/test-extra.yml
vendored
@@ -32,8 +32,9 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
@@ -61,8 +62,9 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
@@ -72,6 +74,37 @@ jobs:
|
|||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||||
|
|
||||||
|
|
||||||
|
tests-rerankers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test rerankers
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/rerankers
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/rerankers test
|
||||||
|
|
||||||
tests-diffusers:
|
tests-diffusers:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -90,8 +123,9 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
@@ -101,6 +135,35 @@ jobs:
|
|||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||||
|
|
||||||
|
tests-parler-tts:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential ffmpeg
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
|
sudo apt-get update && \
|
||||||
|
sudo apt-get install -y conda
|
||||||
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
|
- name: Test parler-tts
|
||||||
|
run: |
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||||
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||||
|
|
||||||
tests-transformers-musicgen:
|
tests-transformers-musicgen:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -120,8 +183,9 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
@@ -151,8 +215,9 @@ jobs:
|
|||||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
# sudo apt-get update && \
|
# sudo apt-get update && \
|
||||||
# sudo apt-get install -y conda
|
# sudo apt-get install -y conda
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
|
# pip install --user grpcio-tools
|
||||||
|
|
||||||
# sudo rm -rfv /usr/bin/conda || true
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
@@ -222,8 +287,9 @@ jobs:
|
|||||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
# sudo apt-get update && \
|
# sudo apt-get update && \
|
||||||
# sudo apt-get install -y conda
|
# sudo apt-get install -y conda
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
|
# pip install --user grpcio-tools
|
||||||
|
|
||||||
# sudo rm -rfv /usr/bin/conda || true
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
@@ -254,8 +320,9 @@ jobs:
|
|||||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
# sudo apt-get update && \
|
# sudo apt-get update && \
|
||||||
# sudo apt-get install -y conda
|
# sudo apt-get install -y conda
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
|
# pip install --user grpcio-tools
|
||||||
# sudo rm -rfv /usr/bin/conda || true
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
# - name: Test vllm
|
# - name: Test vllm
|
||||||
# run: |
|
# run: |
|
||||||
@@ -280,8 +347,9 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
|
pip install --user grpcio-tools
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
- name: Test vall-e-x
|
- name: Test vall-e-x
|
||||||
run: |
|
run: |
|
||||||
@@ -307,7 +375,8 @@ jobs:
|
|||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng
|
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||||
|
pip install --user grpcio-tools
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test coqui
|
- name: Test coqui
|
||||||
|
|||||||
52
.github/workflows/test.yml
vendored
52
.github/workflows/test.yml
vendored
@@ -60,7 +60,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: ${{ matrix.go-version }}
|
go-version: ${{ matrix.go-version }}
|
||||||
cache: false
|
cache: false
|
||||||
@@ -70,17 +70,27 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
sudo apt-get install build-essential curl ffmpeg
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
|
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
|
|
||||||
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
|
pip install --user grpcio-tools
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
sudo rm -rfv /usr/bin/conda || true
|
||||||
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
|
||||||
|
|
||||||
@@ -89,10 +99,10 @@ jobs:
|
|||||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v3
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: grpc
|
path: grpc
|
||||||
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
|
||||||
@@ -108,11 +118,14 @@ jobs:
|
|||||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
timeout-minutes: 5
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
|
|
||||||
tests-aio-container:
|
tests-aio-container:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -163,8 +176,11 @@ jobs:
|
|||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
timeout-minutes: 5
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
|
|
||||||
tests-apple:
|
tests-apple:
|
||||||
runs-on: macOS-14
|
runs-on: macOS-14
|
||||||
@@ -177,7 +193,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Setup Go ${{ matrix.go-version }}
|
- name: Setup Go ${{ matrix.go-version }}
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: ${{ matrix.go-version }}
|
go-version: ${{ matrix.go-version }}
|
||||||
cache: false
|
cache: false
|
||||||
@@ -186,7 +202,8 @@ jobs:
|
|||||||
run: go version
|
run: go version
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||||
|
pip install --user grpcio-tools
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
@@ -196,5 +213,8 @@ jobs:
|
|||||||
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
uses: mxschmitt/action-tmate@v3
|
uses: mxschmitt/action-tmate@v3.18
|
||||||
timeout-minutes: 5
|
with:
|
||||||
|
detached: true
|
||||||
|
connect-timeout-seconds: 180
|
||||||
|
limit-access-to-actor: true
|
||||||
|
|||||||
31
.github/workflows/update_swagger.yaml
vendored
Normal file
31
.github/workflows/update_swagger.yaml
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
name: Update swagger
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: 0 20 * * *
|
||||||
|
workflow_dispatch:
|
||||||
|
jobs:
|
||||||
|
swagger:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-go@v5
|
||||||
|
with:
|
||||||
|
go-version: 'stable'
|
||||||
|
- run: |
|
||||||
|
go install github.com/swaggo/swag/cmd/swag@latest
|
||||||
|
- name: Bump swagger 🔧
|
||||||
|
run: |
|
||||||
|
make swagger
|
||||||
|
- name: Create Pull Request
|
||||||
|
uses: peter-evans/create-pull-request@v6
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
|
push-to-fork: ci-forks/LocalAI
|
||||||
|
commit-message: 'feat(swagger): update swagger'
|
||||||
|
title: 'feat(swagger): update swagger'
|
||||||
|
branch: "update/swagger"
|
||||||
|
body: Update swagger
|
||||||
|
signoff: true
|
||||||
|
|
||||||
18
.github/workflows/yaml-check.yml
vendored
Normal file
18
.github/workflows/yaml-check.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
name: 'Yamllint GitHub Actions'
|
||||||
|
on:
|
||||||
|
- pull_request
|
||||||
|
jobs:
|
||||||
|
yamllint:
|
||||||
|
name: 'Yamllint'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: 'Checkout'
|
||||||
|
uses: actions/checkout@master
|
||||||
|
- name: 'Yamllint'
|
||||||
|
uses: karancode/yamllint-github-action@master
|
||||||
|
with:
|
||||||
|
yamllint_file_or_dir: 'gallery'
|
||||||
|
yamllint_strict: false
|
||||||
|
yamllint_comment: true
|
||||||
|
env:
|
||||||
|
GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -39,3 +39,11 @@ backend-assets/*
|
|||||||
!backend-assets/.keep
|
!backend-assets/.keep
|
||||||
prepare
|
prepare
|
||||||
/ggml-metal.metal
|
/ggml-metal.metal
|
||||||
|
|
||||||
|
# Protobuf generated files
|
||||||
|
*.pb.go
|
||||||
|
*pb2.py
|
||||||
|
*pb2_grpc.py
|
||||||
|
|
||||||
|
# SonarQube
|
||||||
|
.scannerwork
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
# Contributing to localAI
|
# Contributing to LocalAI
|
||||||
|
|
||||||
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
|
Thank you for your interest in contributing to LocalAI! We appreciate your time and effort in helping to improve our project. Before you get started, please take a moment to review these guidelines.
|
||||||
|
|
||||||
@@ -29,8 +29,9 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
|
|
||||||
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
|
1. Clone the repository: `git clone https://github.com/go-skynet/LocalAI.git`
|
||||||
2. Navigate to the project directory: `cd LocalAI`
|
2. Navigate to the project directory: `cd LocalAI`
|
||||||
3. Install the required dependencies: `make prepare`
|
3. Install the required dependencies ( see https://localai.io/basics/build/#build-localai-locally )
|
||||||
4. Run LocalAI: `make run`
|
4. Build LocalAI: `make build`
|
||||||
|
5. Run LocalAI: `./local-ai`
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
@@ -59,14 +60,29 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
|||||||
|
|
||||||
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
|
`make test` cannot handle all the model now. Please be sure to add a test case for the new features or the part was changed.
|
||||||
|
|
||||||
|
### Running AIO tests
|
||||||
|
|
||||||
|
All-In-One images has a set of tests that automatically verifies that most of the endpoints works correctly, a flow can be :
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the LocalAI docker image
|
||||||
|
make DOCKER_IMAGE=local-ai docker
|
||||||
|
|
||||||
|
# Build the corresponding AIO image
|
||||||
|
BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
|
|
||||||
|
# Run the AIO e2e tests
|
||||||
|
LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
|
||||||
|
```
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
- We are welcome the contribution of the documents, please open new PR in the official document repo [localai-website](https://github.com/go-skynet/localai-website)
|
We are welcome the contribution of the documents, please open new PR or create a new issue. The documentation is available under `docs/` https://github.com/mudler/LocalAI/tree/master/docs
|
||||||
|
|
||||||
## Community and Communication
|
## Community and Communication
|
||||||
|
|
||||||
- You can reach out via the Github issue tracker.
|
- You can reach out via the Github issue tracker.
|
||||||
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||||
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
65
Dockerfile
65
Dockerfile
@@ -1,8 +1,9 @@
|
|||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
ARG BASE_IMAGE=ubuntu:22.04
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
|
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||||
|
|
||||||
# extras or core
|
# extras or core
|
||||||
FROM ${BASE_IMAGE} as requirements-core
|
FROM ${BASE_IMAGE} AS requirements-core
|
||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
@@ -15,17 +16,30 @@ ARG TARGETVARIANT
|
|||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
|
apt-get install -y ca-certificates curl python3-pip unzip && apt-get clean
|
||||||
|
|
||||||
# Install Go
|
# Install Go
|
||||||
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -C /usr/local -xz
|
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||||
ENV PATH $PATH:/usr/local/go/bin
|
ENV PATH $PATH:/usr/local/go/bin
|
||||||
|
|
||||||
|
# Install grpc compilers
|
||||||
|
ENV PATH $PATH:/root/go/bin
|
||||||
|
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||||
|
|
||||||
|
# Install protobuf (the version in 22.04 is too old)
|
||||||
|
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
|
||||||
|
# Install grpcio-tools (the version in 22.04 is too old)
|
||||||
|
RUN pip install --user grpcio-tools
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
|
|
||||||
@@ -66,9 +80,10 @@ RUN test -n "$TARGETARCH" \
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements-core as requirements-extras
|
FROM requirements-core AS requirements-extras
|
||||||
|
|
||||||
RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
RUN apt install -y gpg && \
|
||||||
|
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||||
@@ -90,7 +105,7 @@ RUN if [ ! -e /usr/bin/python ]; then \
|
|||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} as grpc
|
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||||
|
|
||||||
ARG MAKEFLAGS
|
ARG MAKEFLAGS
|
||||||
ARG GRPC_VERSION=v1.58.0
|
ARG GRPC_VERSION=v1.58.0
|
||||||
@@ -100,22 +115,21 @@ ENV MAKEFLAGS=${MAKEFLAGS}
|
|||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y g++ cmake git && \
|
apt-get install -y build-essential cmake git && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
|
RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
|
||||||
|
|
||||||
RUN cd grpc && \
|
WORKDIR /build/grpc/cmake/build
|
||||||
mkdir -p cmake/build && \
|
|
||||||
cd cmake/build && \
|
RUN cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
|
||||||
cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
|
|
||||||
make
|
make
|
||||||
|
|
||||||
###################################
|
###################################
|
||||||
###################################
|
###################################
|
||||||
|
|
||||||
FROM requirements-${IMAGE_TYPE} as builder
|
FROM requirements-${IMAGE_TYPE} AS builder
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tts"
|
ARG GO_TAGS="stablediffusion tts"
|
||||||
ARG GRPC_BACKENDS
|
ARG GRPC_BACKENDS
|
||||||
@@ -133,6 +147,12 @@ WORKDIR /build
|
|||||||
COPY . .
|
COPY . .
|
||||||
COPY .git .
|
COPY .git .
|
||||||
RUN echo "GO_TAGS: $GO_TAGS"
|
RUN echo "GO_TAGS: $GO_TAGS"
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential cmake git && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
@@ -147,9 +167,11 @@ RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
|||||||
|
|
||||||
COPY --from=grpc /build/grpc ./grpc/
|
COPY --from=grpc /build/grpc ./grpc/
|
||||||
|
|
||||||
RUN cd /build/grpc/cmake/build && make install
|
WORKDIR /build/grpc/cmake/build
|
||||||
|
RUN make install
|
||||||
|
|
||||||
# Rebuild with defaults backends
|
# Rebuild with defaults backends
|
||||||
|
WORKDIR /build
|
||||||
RUN make build
|
RUN make build
|
||||||
|
|
||||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||||
@@ -191,6 +213,11 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
|||||||
apt-get clean \
|
apt-get clean \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y cmake git && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /build
|
WORKDIR /build
|
||||||
|
|
||||||
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
|
# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
|
||||||
@@ -202,7 +229,7 @@ COPY . .
|
|||||||
COPY --from=builder /build/sources ./sources/
|
COPY --from=builder /build/sources ./sources/
|
||||||
COPY --from=grpc /build/grpc ./grpc/
|
COPY --from=grpc /build/grpc ./grpc/
|
||||||
|
|
||||||
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
|
RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf /build/grpc
|
||||||
|
|
||||||
# Copy the binary
|
# Copy the binary
|
||||||
COPY --from=builder /build/local-ai ./
|
COPY --from=builder /build/local-ai ./
|
||||||
@@ -232,6 +259,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
|||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/sentencetransformers \
|
make -C backend/python/sentencetransformers \
|
||||||
; fi
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
make -C backend/python/rerankers \
|
||||||
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/transformers \
|
make -C backend/python/transformers \
|
||||||
; fi
|
; fi
|
||||||
@@ -250,6 +280,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
|||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/transformers-musicgen \
|
make -C backend/python/transformers-musicgen \
|
||||||
; fi
|
; fi
|
||||||
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
|
make -C backend/python/parler-tts \
|
||||||
|
; fi
|
||||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||||
make -C backend/python/coqui \
|
make -C backend/python/coqui \
|
||||||
; fi
|
; fi
|
||||||
@@ -259,7 +292,7 @@ RUN mkdir -p /build/models
|
|||||||
|
|
||||||
# Define the health check command
|
# Define the health check command
|
||||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||||
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
|
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||||
|
|
||||||
VOLUME /build/models
|
VOLUME /build/models
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|||||||
228
Makefile
228
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
|||||||
|
|
||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=cc4a95426d17417d3c83f12bdb514fbe8abe2a88
|
CPPLLAMA_VERSION?=784e11dea1f5ce9638851b2b0dddb107e2a609c8
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
|||||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_CPP_VERSION?=13c22321d1ac758ce68a429c23104e234b440769
|
WHISPER_CPP_VERSION?=858452d58dba3acdc3431c9bced2bb8cfd9bf418
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||||
@@ -179,20 +179,20 @@ endif
|
|||||||
all: help
|
all: help
|
||||||
|
|
||||||
## BERT embeddings
|
## BERT embeddings
|
||||||
sources/go-bert:
|
sources/go-bert.cpp:
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
|
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert.cpp
|
||||||
cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-bert.cpp && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/go-bert/libgobert.a: sources/go-bert
|
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||||
$(MAKE) -C sources/go-bert libgobert.a
|
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||||
|
|
||||||
## go-llama-ggml
|
## go-llama.cpp
|
||||||
sources/go-llama-ggml:
|
sources/go-llama.cpp:
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
|
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama.cpp
|
||||||
cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-llama.cpp && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
|
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||||
$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
sources/go-piper:
|
sources/go-piper:
|
||||||
@@ -211,12 +211,12 @@ sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
|||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
|
|
||||||
## RWKV
|
## RWKV
|
||||||
sources/go-rwkv:
|
sources/go-rwkv.cpp:
|
||||||
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv
|
git clone --recurse-submodules $(RWKV_REPO) sources/go-rwkv.cpp
|
||||||
cd sources/go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/go-rwkv.cpp && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/go-rwkv/librwkv.a: sources/go-rwkv
|
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||||
cd sources/go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
## stable diffusion
|
## stable diffusion
|
||||||
sources/go-stable-diffusion:
|
sources/go-stable-diffusion:
|
||||||
@@ -236,23 +236,24 @@ sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
|||||||
|
|
||||||
## whisper
|
## whisper
|
||||||
sources/whisper.cpp:
|
sources/whisper.cpp:
|
||||||
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
git clone https://github.com/ggerganov/whisper.cpp sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
|
||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && make libwhisper.a
|
cd sources/whisper.cpp && make libwhisper.a
|
||||||
|
|
||||||
get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||||
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
||||||
@@ -271,12 +272,12 @@ prepare-sources: get-sources replace
|
|||||||
## GENERIC
|
## GENERIC
|
||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama-ggml clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||||
$(MAKE) -C sources/go-rwkv clean
|
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
$(MAKE) -C sources/go-bert clean
|
$(MAKE) -C sources/go-bert.cpp clean
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) -C sources/go-tiny-dream clean
|
$(MAKE) -C sources/go-tiny-dream clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
@@ -289,10 +290,12 @@ clean: ## Remove build related file
|
|||||||
rm -rf ./sources
|
rm -rf ./sources
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
rm -rf release/
|
rm -rf release/
|
||||||
rm -rf backend-assets
|
rm -rf backend-assets/*
|
||||||
$(MAKE) -C backend/cpp/grpc clean
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
$(MAKE) dropreplace
|
$(MAKE) dropreplace
|
||||||
|
$(MAKE) protogen-clean
|
||||||
|
rmdir pkg/grpc/proto || true
|
||||||
|
|
||||||
clean-tests:
|
clean-tests:
|
||||||
rm -rf test-models
|
rm -rf test-models
|
||||||
@@ -416,30 +419,152 @@ help: ## Show this help.
|
|||||||
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
||||||
}' $(MAKEFILE_LIST)
|
}' $(MAKEFILE_LIST)
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
protogen: protogen-go protogen-python
|
protogen: protogen-go protogen-python
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean: protogen-go-clean protogen-python-clean
|
||||||
|
|
||||||
|
.PHONY: protogen-go
|
||||||
protogen-go:
|
protogen-go:
|
||||||
|
mkdir -p pkg/grpc/proto
|
||||||
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||||
backend/backend.proto
|
backend/backend.proto
|
||||||
|
|
||||||
protogen-python:
|
.PHONY: protogen-go-clean
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
|
protogen-go-clean:
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
|
$(RM) pkg/grpc/proto/backend.pb.go pkg/grpc/proto/backend_grpc.pb.go
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/transformers-musicgen/ --grpc_python_out=backend/python/transformers-musicgen/ backend/backend.proto
|
$(RM) bin/*
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
|
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
|
.PHONY: protogen-python
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
|
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/diffusers/ --grpc_python_out=backend/python/diffusers/ backend/backend.proto
|
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/coqui/ --grpc_python_out=backend/python/coqui/ backend/backend.proto
|
.PHONY: protogen-python-clean
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vall-e-x/ --grpc_python_out=backend/python/vall-e-x/ backend/backend.proto
|
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/vllm/ --grpc_python_out=backend/python/vllm/ backend/backend.proto
|
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/petals/ --grpc_python_out=backend/python/petals/ backend/backend.proto
|
.PHONY: autogptq-protogen
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/mamba/ --grpc_python_out=backend/python/mamba/ backend/backend.proto
|
autogptq-protogen:
|
||||||
python3 -m grpc_tools.protoc -Ibackend/ --python_out=backend/python/exllama2/ --grpc_python_out=backend/python/exllama2/ backend/backend.proto
|
$(MAKE) -C backend/python/autogptq protogen
|
||||||
|
|
||||||
|
.PHONY: autogptq-protogen-clean
|
||||||
|
autogptq-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/autogptq protogen-clean
|
||||||
|
|
||||||
|
.PHONY: bark-protogen
|
||||||
|
bark-protogen:
|
||||||
|
$(MAKE) -C backend/python/bark protogen
|
||||||
|
|
||||||
|
.PHONY: bark-protogen-clean
|
||||||
|
bark-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/bark protogen-clean
|
||||||
|
|
||||||
|
.PHONY: coqui-protogen
|
||||||
|
coqui-protogen:
|
||||||
|
$(MAKE) -C backend/python/coqui protogen
|
||||||
|
|
||||||
|
.PHONY: coqui-protogen-clean
|
||||||
|
coqui-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/coqui protogen-clean
|
||||||
|
|
||||||
|
.PHONY: diffusers-protogen
|
||||||
|
diffusers-protogen:
|
||||||
|
$(MAKE) -C backend/python/diffusers protogen
|
||||||
|
|
||||||
|
.PHONY: diffusers-protogen-clean
|
||||||
|
diffusers-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: exllama-protogen
|
||||||
|
exllama-protogen:
|
||||||
|
$(MAKE) -C backend/python/exllama protogen
|
||||||
|
|
||||||
|
.PHONY: exllama-protogen-clean
|
||||||
|
exllama-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/exllama protogen-clean
|
||||||
|
|
||||||
|
.PHONY: exllama2-protogen
|
||||||
|
exllama2-protogen:
|
||||||
|
$(MAKE) -C backend/python/exllama2 protogen
|
||||||
|
|
||||||
|
.PHONY: exllama2-protogen-clean
|
||||||
|
exllama2-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||||
|
|
||||||
|
.PHONY: mamba-protogen
|
||||||
|
mamba-protogen:
|
||||||
|
$(MAKE) -C backend/python/mamba protogen
|
||||||
|
|
||||||
|
.PHONY: mamba-protogen-clean
|
||||||
|
mamba-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/mamba protogen-clean
|
||||||
|
|
||||||
|
.PHONY: petals-protogen
|
||||||
|
petals-protogen:
|
||||||
|
$(MAKE) -C backend/python/petals protogen
|
||||||
|
|
||||||
|
.PHONY: petals-protogen-clean
|
||||||
|
petals-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/petals protogen-clean
|
||||||
|
|
||||||
|
.PHONY: rerankers-protogen
|
||||||
|
rerankers-protogen:
|
||||||
|
$(MAKE) -C backend/python/rerankers protogen
|
||||||
|
|
||||||
|
.PHONY: rerankers-protogen-clean
|
||||||
|
rerankers-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/rerankers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: sentencetransformers-protogen
|
||||||
|
sentencetransformers-protogen:
|
||||||
|
$(MAKE) -C backend/python/sentencetransformers protogen
|
||||||
|
|
||||||
|
.PHONY: sentencetransformers-protogen-clean
|
||||||
|
sentencetransformers-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/sentencetransformers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: transformers-protogen
|
||||||
|
transformers-protogen:
|
||||||
|
$(MAKE) -C backend/python/transformers protogen
|
||||||
|
|
||||||
|
.PHONY: transformers-protogen-clean
|
||||||
|
transformers-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/transformers protogen-clean
|
||||||
|
|
||||||
|
.PHONY: parler-tts-protogen
|
||||||
|
parler-tts-protogen:
|
||||||
|
$(MAKE) -C backend/python/parler-tts protogen
|
||||||
|
|
||||||
|
.PHONY: parler-tts-protogen-clean
|
||||||
|
parler-tts-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/parler-tts protogen-clean
|
||||||
|
|
||||||
|
.PHONY: transformers-musicgen-protogen
|
||||||
|
transformers-musicgen-protogen:
|
||||||
|
$(MAKE) -C backend/python/transformers-musicgen protogen
|
||||||
|
|
||||||
|
.PHONY: transformers-musicgen-protogen-clean
|
||||||
|
transformers-musicgen-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/transformers-musicgen protogen-clean
|
||||||
|
|
||||||
|
.PHONY: vall-e-x-protogen
|
||||||
|
vall-e-x-protogen:
|
||||||
|
$(MAKE) -C backend/python/vall-e-x protogen
|
||||||
|
|
||||||
|
.PHONY: vall-e-x-protogen-clean
|
||||||
|
vall-e-x-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
||||||
|
|
||||||
|
.PHONY: vllm-protogen
|
||||||
|
vllm-protogen:
|
||||||
|
$(MAKE) -C backend/python/vllm protogen
|
||||||
|
|
||||||
|
.PHONY: vllm-protogen-clean
|
||||||
|
vllm-protogen-clean:
|
||||||
|
$(MAKE) -C backend/python/vllm protogen-clean
|
||||||
|
|
||||||
## GRPC
|
## GRPC
|
||||||
# Note: it is duplicated in the Dockerfile
|
# Note: it is duplicated in the Dockerfile
|
||||||
prepare-extra-conda-environments:
|
prepare-extra-conda-environments: protogen-python
|
||||||
$(MAKE) -C backend/python/autogptq
|
$(MAKE) -C backend/python/autogptq
|
||||||
$(MAKE) -C backend/python/bark
|
$(MAKE) -C backend/python/bark
|
||||||
$(MAKE) -C backend/python/coqui
|
$(MAKE) -C backend/python/coqui
|
||||||
@@ -447,14 +572,16 @@ prepare-extra-conda-environments:
|
|||||||
$(MAKE) -C backend/python/vllm
|
$(MAKE) -C backend/python/vllm
|
||||||
$(MAKE) -C backend/python/mamba
|
$(MAKE) -C backend/python/mamba
|
||||||
$(MAKE) -C backend/python/sentencetransformers
|
$(MAKE) -C backend/python/sentencetransformers
|
||||||
|
$(MAKE) -C backend/python/rerankers
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/transformers-musicgen
|
$(MAKE) -C backend/python/transformers-musicgen
|
||||||
|
$(MAKE) -C backend/python/parler-tts
|
||||||
$(MAKE) -C backend/python/vall-e-x
|
$(MAKE) -C backend/python/vall-e-x
|
||||||
$(MAKE) -C backend/python/exllama
|
$(MAKE) -C backend/python/exllama
|
||||||
$(MAKE) -C backend/python/petals
|
$(MAKE) -C backend/python/petals
|
||||||
$(MAKE) -C backend/python/exllama2
|
$(MAKE) -C backend/python/exllama2
|
||||||
|
|
||||||
prepare-test-extra:
|
prepare-test-extra: protogen-python
|
||||||
$(MAKE) -C backend/python/transformers
|
$(MAKE) -C backend/python/transformers
|
||||||
$(MAKE) -C backend/python/diffusers
|
$(MAKE) -C backend/python/diffusers
|
||||||
|
|
||||||
@@ -478,11 +605,11 @@ backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/
|
|||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
|
||||||
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
|
||||||
|
|
||||||
backend-assets/grpc: replace
|
backend-assets/grpc: protogen-go replace
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
|
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||||
|
|
||||||
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
|
||||||
@@ -524,17 +651,16 @@ ifeq ($(BUILD_TYPE),metal)
|
|||||||
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
|
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||||
|
|
||||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||||
|
|
||||||
backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
|
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||||
@@ -597,4 +723,4 @@ docker-image-intel-xpu:
|
|||||||
|
|
||||||
.PHONY: swagger
|
.PHONY: swagger
|
||||||
swagger:
|
swagger:
|
||||||
swag init -g core/http/api.go --output swagger
|
swag init -g core/http/app.go --output swagger
|
||||||
|
|||||||
14
README.md
14
README.md
@@ -44,23 +44,19 @@
|
|||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
|
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||||
|
|
||||||
## 🔥🔥 Hot topics / Roadmap
|
## 🔥🔥 Hot topics / Roadmap
|
||||||
|
|
||||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
- Landing page: https://github.com/mudler/LocalAI/pull/1922
|
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
|
||||||
|
- llama3: https://github.com/mudler/LocalAI/discussions/2076
|
||||||
|
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
|
||||||
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
||||||
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
||||||
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||||
- Parallel function calling: https://github.com/mudler/LocalAI/pull/1726 / Tools API support: https://github.com/mudler/LocalAI/pull/1715
|
|
||||||
- Upload file API: https://github.com/mudler/LocalAI/pull/1703
|
|
||||||
- ROCm container images: https://github.com/mudler/LocalAI/pull/1595 / Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
|
|
||||||
- Mamba support: https://github.com/mudler/LocalAI/pull/1589
|
|
||||||
- Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
|
|
||||||
- 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
|
|
||||||
- Img2vid https://github.com/mudler/LocalAI/pull/1442
|
|
||||||
|
|
||||||
Hot topics (looking for contributors):
|
Hot topics (looking for contributors):
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
|
|||||||
27
aio/cpu/rerank.yaml
Normal file
27
aio/cpu/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
name: jina-reranker-v1-base-en
|
||||||
|
backend: rerankers
|
||||||
|
parameters:
|
||||||
|
model: cross-encoder
|
||||||
|
|
||||||
|
usage: |
|
||||||
|
You can test this model with curl like this:
|
||||||
|
|
||||||
|
curl http://localhost:8080/v1/rerank \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "jina-reranker-v1-base-en",
|
||||||
|
"query": "Organic skincare products for sensitive skin",
|
||||||
|
"documents": [
|
||||||
|
"Eco-friendly kitchenware for modern homes",
|
||||||
|
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||||
|
"Organic cotton baby clothes for sensitive skin",
|
||||||
|
"Natural organic skincare range for sensitive skin",
|
||||||
|
"Tech gadgets for smart homes: 2024 edition",
|
||||||
|
"Sustainable gardening tools and compost solutions",
|
||||||
|
"Sensitive skin-friendly facial cleansers and toners",
|
||||||
|
"Organic food wraps and storage solutions",
|
||||||
|
"All-natural pet food for dogs with allergies",
|
||||||
|
"Yoga mats made from recycled materials"
|
||||||
|
],
|
||||||
|
"top_n": 3
|
||||||
|
}'
|
||||||
@@ -6,15 +6,22 @@ parameters:
|
|||||||
template:
|
template:
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
{{- if .FunctionCall }}
|
||||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
<tool_call>
|
||||||
{{- if .Content}}
|
{{- else if eq .RoleName "tool" }}
|
||||||
{{.Content}}
|
<tool_response>
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
{{- if .Content}}
|
||||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
{{.Content }}
|
||||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
{{- end }}
|
||||||
<|im_end|>
|
{{- if .FunctionCall}}
|
||||||
|
{{toJson .FunctionCall}}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .FunctionCall }}
|
||||||
|
</tool_call>
|
||||||
|
{{- else if eq .RoleName "tool" }}
|
||||||
|
</tool_response>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
function: |
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
@@ -29,8 +36,7 @@ template:
|
|||||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||||
<tool_call>
|
<tool_call>
|
||||||
{'arguments': <args-dict>, 'name': <function-name>}
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
</tool_call>
|
</tool_call><|im_end|>
|
||||||
<|im_end|>
|
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
<tool_call>
|
<tool_call>
|
||||||
|
|||||||
@@ -129,7 +129,7 @@ detect_gpu
|
|||||||
detect_gpu_size
|
detect_gpu_size
|
||||||
|
|
||||||
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
|
||||||
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
|
||||||
|
|
||||||
check_vars
|
check_vars
|
||||||
|
|
||||||
|
|||||||
27
aio/gpu-8g/rerank.yaml
Normal file
27
aio/gpu-8g/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
name: jina-reranker-v1-base-en
|
||||||
|
backend: rerankers
|
||||||
|
parameters:
|
||||||
|
model: cross-encoder
|
||||||
|
|
||||||
|
usage: |
|
||||||
|
You can test this model with curl like this:
|
||||||
|
|
||||||
|
curl http://localhost:8080/v1/rerank \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "jina-reranker-v1-base-en",
|
||||||
|
"query": "Organic skincare products for sensitive skin",
|
||||||
|
"documents": [
|
||||||
|
"Eco-friendly kitchenware for modern homes",
|
||||||
|
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||||
|
"Organic cotton baby clothes for sensitive skin",
|
||||||
|
"Natural organic skincare range for sensitive skin",
|
||||||
|
"Tech gadgets for smart homes: 2024 edition",
|
||||||
|
"Sustainable gardening tools and compost solutions",
|
||||||
|
"Sensitive skin-friendly facial cleansers and toners",
|
||||||
|
"Organic food wraps and storage solutions",
|
||||||
|
"All-natural pet food for dogs with allergies",
|
||||||
|
"Yoga mats made from recycled materials"
|
||||||
|
],
|
||||||
|
"top_n": 3
|
||||||
|
}'
|
||||||
@@ -6,15 +6,22 @@ parameters:
|
|||||||
template:
|
template:
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
{{- if .FunctionCall }}
|
||||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
<tool_call>
|
||||||
{{- if .Content}}
|
{{- else if eq .RoleName "tool" }}
|
||||||
{{.Content}}
|
<tool_response>
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
{{- if .Content}}
|
||||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
{{.Content }}
|
||||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
{{- end }}
|
||||||
<|im_end|>
|
{{- if .FunctionCall}}
|
||||||
|
{{toJson .FunctionCall}}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .FunctionCall }}
|
||||||
|
</tool_call>
|
||||||
|
{{- else if eq .RoleName "tool" }}
|
||||||
|
</tool_response>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
function: |
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
@@ -29,8 +36,7 @@ template:
|
|||||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||||
<tool_call>
|
<tool_call>
|
||||||
{'arguments': <args-dict>, 'name': <function-name>}
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
</tool_call>
|
</tool_call><|im_end|>
|
||||||
<|im_end|>
|
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
<tool_call>
|
<tool_call>
|
||||||
|
|||||||
27
aio/intel/rerank.yaml
Normal file
27
aio/intel/rerank.yaml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
name: jina-reranker-v1-base-en
|
||||||
|
backend: rerankers
|
||||||
|
parameters:
|
||||||
|
model: cross-encoder
|
||||||
|
|
||||||
|
usage: |
|
||||||
|
You can test this model with curl like this:
|
||||||
|
|
||||||
|
curl http://localhost:8080/v1/rerank \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "jina-reranker-v1-base-en",
|
||||||
|
"query": "Organic skincare products for sensitive skin",
|
||||||
|
"documents": [
|
||||||
|
"Eco-friendly kitchenware for modern homes",
|
||||||
|
"Biodegradable cleaning supplies for eco-conscious consumers",
|
||||||
|
"Organic cotton baby clothes for sensitive skin",
|
||||||
|
"Natural organic skincare range for sensitive skin",
|
||||||
|
"Tech gadgets for smart homes: 2024 edition",
|
||||||
|
"Sustainable gardening tools and compost solutions",
|
||||||
|
"Sensitive skin-friendly facial cleansers and toners",
|
||||||
|
"Organic food wraps and storage solutions",
|
||||||
|
"All-natural pet food for dogs with allergies",
|
||||||
|
"Yoga mats made from recycled materials"
|
||||||
|
],
|
||||||
|
"top_n": 3
|
||||||
|
}'
|
||||||
@@ -7,15 +7,22 @@ parameters:
|
|||||||
template:
|
template:
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||||
{{- if .FunctionCall }}<tool_call>{{end}}
|
{{- if .FunctionCall }}
|
||||||
{{- if eq .RoleName "tool" }}<tool_result>{{end }}
|
<tool_call>
|
||||||
{{- if .Content}}
|
{{- else if eq .RoleName "tool" }}
|
||||||
{{.Content}}
|
<tool_response>
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }}
|
{{- if .Content}}
|
||||||
{{- if .FunctionCall }}</tool_call>{{end }}
|
{{.Content }}
|
||||||
{{- if eq .RoleName "tool" }}</tool_result>{{end }}
|
{{- end }}
|
||||||
<|im_end|>
|
{{- if .FunctionCall}}
|
||||||
|
{{toJson .FunctionCall}}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .FunctionCall }}
|
||||||
|
</tool_call>
|
||||||
|
{{- else if eq .RoleName "tool" }}
|
||||||
|
</tool_response>
|
||||||
|
{{- end }}<|im_end|>
|
||||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
function: |
|
function: |
|
||||||
<|im_start|>system
|
<|im_start|>system
|
||||||
@@ -30,8 +37,7 @@ template:
|
|||||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||||
<tool_call>
|
<tool_call>
|
||||||
{'arguments': <args-dict>, 'name': <function-name>}
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
</tool_call>
|
</tool_call><|im_end|>
|
||||||
<|im_end|>
|
|
||||||
{{.Input -}}
|
{{.Input -}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
<tool_call>
|
<tool_call>
|
||||||
|
|||||||
@@ -23,6 +23,30 @@ service Backend {
|
|||||||
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
|
||||||
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
|
||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||||
|
|
||||||
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
message RerankRequest {
|
||||||
|
string query = 1;
|
||||||
|
repeated string documents = 2;
|
||||||
|
int32 top_n = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RerankResult {
|
||||||
|
Usage usage = 1;
|
||||||
|
repeated DocumentResult results = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Usage {
|
||||||
|
int32 total_tokens = 1;
|
||||||
|
int32 prompt_tokens = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message DocumentResult {
|
||||||
|
int32 index = 1;
|
||||||
|
string text = 2;
|
||||||
|
float relevance_score = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
message StoresKey {
|
message StoresKey {
|
||||||
@@ -107,11 +131,15 @@ message PredictOptions {
|
|||||||
string NegativePrompt = 40;
|
string NegativePrompt = 40;
|
||||||
int32 NDraft = 41;
|
int32 NDraft = 41;
|
||||||
repeated string Images = 42;
|
repeated string Images = 42;
|
||||||
|
bool UseTokenizerTemplate = 43;
|
||||||
|
repeated Message Messages = 44;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
message Reply {
|
message Reply {
|
||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
|
int32 tokens = 2;
|
||||||
|
int32 prompt_tokens = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -173,6 +201,7 @@ message ModelOptions {
|
|||||||
bool EnforceEager = 52;
|
bool EnforceEager = 52;
|
||||||
int32 SwapSpace = 53;
|
int32 SwapSpace = 53;
|
||||||
int32 MaxModelLen = 54;
|
int32 MaxModelLen = 54;
|
||||||
|
int32 TensorParallelSize = 55;
|
||||||
|
|
||||||
string MMProj = 41;
|
string MMProj = 41;
|
||||||
|
|
||||||
@@ -256,3 +285,8 @@ message StatusResponse {
|
|||||||
State state = 1;
|
State state = 1;
|
||||||
MemoryUsageData memory = 2;
|
MemoryUsageData memory = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message Message {
|
||||||
|
string role = 1;
|
||||||
|
string content = 2;
|
||||||
|
}
|
||||||
@@ -1,457 +0,0 @@
|
|||||||
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
|
|
||||||
// versions:
|
|
||||||
// - protoc-gen-go-grpc v1.2.0
|
|
||||||
// - protoc v4.23.4
|
|
||||||
// source: backend/backend.proto
|
|
||||||
|
|
||||||
package proto
|
|
||||||
|
|
||||||
import (
|
|
||||||
context "context"
|
|
||||||
grpc "google.golang.org/grpc"
|
|
||||||
codes "google.golang.org/grpc/codes"
|
|
||||||
status "google.golang.org/grpc/status"
|
|
||||||
)
|
|
||||||
|
|
||||||
// This is a compile-time assertion to ensure that this generated file
|
|
||||||
// is compatible with the grpc package it is being compiled against.
|
|
||||||
// Requires gRPC-Go v1.32.0 or later.
|
|
||||||
const _ = grpc.SupportPackageIsVersion7
|
|
||||||
|
|
||||||
// BackendClient is the client API for Backend service.
|
|
||||||
//
|
|
||||||
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
|
|
||||||
type BackendClient interface {
|
|
||||||
Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error)
|
|
||||||
Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error)
|
|
||||||
LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error)
|
|
||||||
PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error)
|
|
||||||
Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error)
|
|
||||||
GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error)
|
|
||||||
AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error)
|
|
||||||
TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
|
|
||||||
TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
|
|
||||||
Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
type backendClient struct {
|
|
||||||
cc grpc.ClientConnInterface
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {
|
|
||||||
return &backendClient{cc}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
|
|
||||||
out := new(Reply)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
|
|
||||||
out := new(Reply)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
|
|
||||||
out := new(Result)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
|
|
||||||
stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
x := &backendPredictStreamClient{stream}
|
|
||||||
if err := x.ClientStream.SendMsg(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if err := x.ClientStream.CloseSend(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return x, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type Backend_PredictStreamClient interface {
|
|
||||||
Recv() (*Reply, error)
|
|
||||||
grpc.ClientStream
|
|
||||||
}
|
|
||||||
|
|
||||||
type backendPredictStreamClient struct {
|
|
||||||
grpc.ClientStream
|
|
||||||
}
|
|
||||||
|
|
||||||
func (x *backendPredictStreamClient) Recv() (*Reply, error) {
|
|
||||||
m := new(Reply)
|
|
||||||
if err := x.ClientStream.RecvMsg(m); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return m, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
|
|
||||||
out := new(EmbeddingResult)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
|
|
||||||
out := new(Result)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
|
|
||||||
out := new(TranscriptResult)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
|
|
||||||
out := new(Result)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
|
|
||||||
out := new(TokenizationResponse)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
|
|
||||||
out := new(StatusResponse)
|
|
||||||
err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return out, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// BackendServer is the server API for Backend service.
|
|
||||||
// All implementations must embed UnimplementedBackendServer
|
|
||||||
// for forward compatibility
|
|
||||||
type BackendServer interface {
|
|
||||||
Health(context.Context, *HealthMessage) (*Reply, error)
|
|
||||||
Predict(context.Context, *PredictOptions) (*Reply, error)
|
|
||||||
LoadModel(context.Context, *ModelOptions) (*Result, error)
|
|
||||||
PredictStream(*PredictOptions, Backend_PredictStreamServer) error
|
|
||||||
Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error)
|
|
||||||
GenerateImage(context.Context, *GenerateImageRequest) (*Result, error)
|
|
||||||
AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error)
|
|
||||||
TTS(context.Context, *TTSRequest) (*Result, error)
|
|
||||||
TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
|
|
||||||
Status(context.Context, *HealthMessage) (*StatusResponse, error)
|
|
||||||
mustEmbedUnimplementedBackendServer()
|
|
||||||
}
|
|
||||||
|
|
||||||
// UnimplementedBackendServer must be embedded to have forward compatible implementations.
|
|
||||||
type UnimplementedBackendServer struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (UnimplementedBackendServer) Health(context.Context, *HealthMessage) (*Reply, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method Health not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) Predict(context.Context, *PredictOptions) (*Reply, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method Predict not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) LoadModel(context.Context, *ModelOptions) (*Result, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method LoadModel not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) PredictStream(*PredictOptions, Backend_PredictStreamServer) error {
|
|
||||||
return status.Errorf(codes.Unimplemented, "method PredictStream not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) Embedding(context.Context, *PredictOptions) (*EmbeddingResult, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method Embedding not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) GenerateImage(context.Context, *GenerateImageRequest) (*Result, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method GenerateImage not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) AudioTranscription(context.Context, *TranscriptRequest) (*TranscriptResult, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method AudioTranscription not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) TTS(context.Context, *TTSRequest) (*Result, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method TTS not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method TokenizeString not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
|
|
||||||
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
|
|
||||||
}
|
|
||||||
func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}
|
|
||||||
|
|
||||||
// UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
|
|
||||||
// Use of this interface is not recommended, as added methods to BackendServer will
|
|
||||||
// result in compilation errors.
|
|
||||||
type UnsafeBackendServer interface {
|
|
||||||
mustEmbedUnimplementedBackendServer()
|
|
||||||
}
|
|
||||||
|
|
||||||
func RegisterBackendServer(s grpc.ServiceRegistrar, srv BackendServer) {
|
|
||||||
s.RegisterService(&Backend_ServiceDesc, srv)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(HealthMessage)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).Health(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/Health",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(PredictOptions)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).Predict(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/Predict",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(ModelOptions)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).LoadModel(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/LoadModel",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_PredictStream_Handler(srv interface{}, stream grpc.ServerStream) error {
|
|
||||||
m := new(PredictOptions)
|
|
||||||
if err := stream.RecvMsg(m); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return srv.(BackendServer).PredictStream(m, &backendPredictStreamServer{stream})
|
|
||||||
}
|
|
||||||
|
|
||||||
type Backend_PredictStreamServer interface {
|
|
||||||
Send(*Reply) error
|
|
||||||
grpc.ServerStream
|
|
||||||
}
|
|
||||||
|
|
||||||
type backendPredictStreamServer struct {
|
|
||||||
grpc.ServerStream
|
|
||||||
}
|
|
||||||
|
|
||||||
func (x *backendPredictStreamServer) Send(m *Reply) error {
|
|
||||||
return x.ServerStream.SendMsg(m)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(PredictOptions)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).Embedding(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/Embedding",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(GenerateImageRequest)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).GenerateImage(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/GenerateImage",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(TranscriptRequest)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).AudioTranscription(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/AudioTranscription",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(TTSRequest)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).TTS(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/TTS",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(PredictOptions)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).TokenizeString(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/TokenizeString",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
|
||||||
in := new(HealthMessage)
|
|
||||||
if err := dec(in); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if interceptor == nil {
|
|
||||||
return srv.(BackendServer).Status(ctx, in)
|
|
||||||
}
|
|
||||||
info := &grpc.UnaryServerInfo{
|
|
||||||
Server: srv,
|
|
||||||
FullMethod: "/backend.Backend/Status",
|
|
||||||
}
|
|
||||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
|
||||||
return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
|
|
||||||
}
|
|
||||||
return interceptor(ctx, in, info, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
|
|
||||||
// It's only intended for direct use with grpc.RegisterService,
|
|
||||||
// and not to be introspected or modified (even as a copy)
|
|
||||||
var Backend_ServiceDesc = grpc.ServiceDesc{
|
|
||||||
ServiceName: "backend.Backend",
|
|
||||||
HandlerType: (*BackendServer)(nil),
|
|
||||||
Methods: []grpc.MethodDesc{
|
|
||||||
{
|
|
||||||
MethodName: "Health",
|
|
||||||
Handler: _Backend_Health_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "Predict",
|
|
||||||
Handler: _Backend_Predict_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "LoadModel",
|
|
||||||
Handler: _Backend_LoadModel_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "Embedding",
|
|
||||||
Handler: _Backend_Embedding_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "GenerateImage",
|
|
||||||
Handler: _Backend_GenerateImage_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "AudioTranscription",
|
|
||||||
Handler: _Backend_AudioTranscription_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "TTS",
|
|
||||||
Handler: _Backend_TTS_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "TokenizeString",
|
|
||||||
Handler: _Backend_TokenizeString_Handler,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
MethodName: "Status",
|
|
||||||
Handler: _Backend_Status_Handler,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Streams: []grpc.StreamDesc{
|
|
||||||
{
|
|
||||||
StreamName: "PredictStream",
|
|
||||||
Handler: _Backend_PredictStream_Handler,
|
|
||||||
ServerStreams: true,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Metadata: "backend/backend.proto",
|
|
||||||
}
|
|
||||||
@@ -5,7 +5,6 @@ SYSTEM ?= $(HOST_SYSTEM)
|
|||||||
TAG_LIB_GRPC?=v1.59.0
|
TAG_LIB_GRPC?=v1.59.0
|
||||||
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
|
GIT_REPO_LIB_GRPC?=https://github.com/grpc/grpc.git
|
||||||
GIT_CLONE_DEPTH?=1
|
GIT_CLONE_DEPTH?=1
|
||||||
NUM_BUILD_THREADS?=$(shell nproc --ignore=1)
|
|
||||||
|
|
||||||
INSTALLED_PACKAGES=installed_packages
|
INSTALLED_PACKAGES=installed_packages
|
||||||
GRPC_REPO=grpc_repo
|
GRPC_REPO=grpc_repo
|
||||||
@@ -52,7 +51,7 @@ $(GRPC_REPO):
|
|||||||
|
|
||||||
$(GRPC_BUILD): $(GRPC_REPO)
|
$(GRPC_BUILD): $(GRPC_REPO)
|
||||||
mkdir -p $(GRPC_BUILD)
|
mkdir -p $(GRPC_BUILD)
|
||||||
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . -- -j ${NUM_BUILD_THREADS} && cmake --build . --target install -- -j ${NUM_BUILD_THREADS}
|
cd $(GRPC_BUILD) && cmake $(CMAKE_ARGS) ../$(GRPC_REPO)/grpc && cmake --build . && cmake --build . --target install
|
||||||
|
|
||||||
build: $(INSTALLED_PACKAGES)
|
build: $(INSTALLED_PACKAGES)
|
||||||
|
|
||||||
|
|||||||
@@ -2332,6 +2332,10 @@ public:
|
|||||||
std::string completion_text = result.result_json.value("content", "");
|
std::string completion_text = result.result_json.value("content", "");
|
||||||
|
|
||||||
reply.set_message(completion_text);
|
reply.set_message(completion_text);
|
||||||
|
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||||
|
reply.set_tokens(tokens_predicted);
|
||||||
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
|
reply.set_prompt_tokens(tokens_evaluated);
|
||||||
|
|
||||||
// Send the reply
|
// Send the reply
|
||||||
writer->Write(reply);
|
writer->Write(reply);
|
||||||
@@ -2357,6 +2361,10 @@ public:
|
|||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
completion_text = result.result_json.value("content", "");
|
completion_text = result.result_json.value("content", "");
|
||||||
|
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||||
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
|
reply->set_prompt_tokens(tokens_evaluated);
|
||||||
|
reply->set_tokens(tokens_predicted);
|
||||||
reply->set_message(completion_text);
|
reply->set_message(completion_text);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -1,4 +1,13 @@
|
|||||||
.PHONY: autogptq
|
.PHONY: autogptq
|
||||||
autogptq:
|
autogptq: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
@@ -39,7 +39,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
self.model_name = "Qwen-VL-Chat"
|
self.model_name = "Qwen-VL-Chat"
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_path,
|
model = AutoModelForCausalLM.from_pretrained(model_path,
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
trust_remote_code=request.TrustRemoteCode,
|
||||||
use_triton=request.UseTriton,
|
|
||||||
device_map="auto").eval()
|
device_map="auto").eval()
|
||||||
else:
|
else:
|
||||||
model = AutoGPTQForCausalLM.from_quantized(model_path,
|
model = AutoGPTQForCausalLM.from_quantized(model_path,
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -1,15 +1,25 @@
|
|||||||
.PHONY: ttsbark
|
.PHONY: ttsbark
|
||||||
ttsbark:
|
ttsbark: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running bark..."
|
@echo "Running bark..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "bark run."
|
@echo "bark run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing bark..."
|
@echo "Testing bark..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "bark tested."
|
@echo "bark tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||||
|
REQUIREMENTS_FILE=$1
|
||||||
|
|
||||||
# Check if environment exist
|
# Check if environment exist
|
||||||
conda_env_exists(){
|
conda_env_exists(){
|
||||||
@@ -14,7 +15,7 @@ else
|
|||||||
export PATH=$PATH:/opt/conda/bin
|
export PATH=$PATH:/opt/conda/bin
|
||||||
if conda_env_exists "transformers" ; then
|
if conda_env_exists "transformers" ; then
|
||||||
echo "Creating virtual environment..."
|
echo "Creating virtual environment..."
|
||||||
conda env create --name transformers --file $1
|
conda env create --name transformers --file $REQUIREMENTS_FILE
|
||||||
echo "Virtual environment created."
|
echo "Virtual environment created."
|
||||||
else
|
else
|
||||||
echo "Virtual environment already exists."
|
echo "Virtual environment already exists."
|
||||||
@@ -28,11 +29,16 @@ if [ -d "/opt/intel" ]; then
|
|||||||
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
|
pip install intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino]
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
# If we didn't skip conda, activate the environment
|
||||||
if [ $SKIP_CONDA -eq 0 ]; then
|
# to install FlashAttention
|
||||||
# Activate conda environment
|
if [ $SKIP_CONDA -eq 0 ]; then
|
||||||
source activate transformers
|
source activate transformers
|
||||||
fi
|
fi
|
||||||
|
if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then
|
||||||
|
#TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily
|
||||||
|
pip install flash-attn --no-build-isolation
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||||
pip cache purge
|
pip cache purge
|
||||||
fi
|
fi
|
||||||
@@ -116,8 +116,10 @@ dependencies:
|
|||||||
- sudachipy
|
- sudachipy
|
||||||
- sudachidict_core
|
- sudachidict_core
|
||||||
- vocos
|
- vocos
|
||||||
- vllm==0.3.2
|
- vllm>=0.4.0
|
||||||
- transformers>=4.38.2 # Updated Version
|
- transformers>=4.38.2 # Updated Version
|
||||||
- transformers_stream_generator==0.0.5
|
- transformers_stream_generator==0.0.5
|
||||||
- xformers==0.0.23.post1
|
- xformers==0.0.23.post1
|
||||||
|
- rerankers[transformers]
|
||||||
|
- pydantic
|
||||||
prefix: /opt/conda/envs/transformers
|
prefix: /opt/conda/envs/transformers
|
||||||
|
|||||||
@@ -104,8 +104,10 @@ dependencies:
|
|||||||
- sudachipy
|
- sudachipy
|
||||||
- sudachidict_core
|
- sudachidict_core
|
||||||
- vocos
|
- vocos
|
||||||
- vllm==0.3.2
|
- vllm>=0.4.0
|
||||||
- transformers>=4.38.2 # Updated Version
|
- transformers>=4.38.2 # Updated Version
|
||||||
- transformers_stream_generator==0.0.5
|
- transformers_stream_generator==0.0.5
|
||||||
- xformers==0.0.23.post1
|
- xformers==0.0.23.post1
|
||||||
|
- rerankers[transformers]
|
||||||
|
- pydantic
|
||||||
prefix: /opt/conda/envs/transformers
|
prefix: /opt/conda/envs/transformers
|
||||||
|
|||||||
@@ -108,8 +108,10 @@ dependencies:
|
|||||||
- sudachipy
|
- sudachipy
|
||||||
- sudachidict_core
|
- sudachidict_core
|
||||||
- vocos
|
- vocos
|
||||||
- vllm==0.3.2
|
- vllm>=0.4.0
|
||||||
- transformers>=4.38.2 # Updated Version
|
- transformers>=4.38.2 # Updated Version
|
||||||
- transformers_stream_generator==0.0.5
|
- transformers_stream_generator==0.0.5
|
||||||
- xformers==0.0.23.post1
|
- xformers==0.0.23.post1
|
||||||
|
- rerankers[transformers]
|
||||||
|
- pydantic
|
||||||
prefix: /opt/conda/envs/transformers
|
prefix: /opt/conda/envs/transformers
|
||||||
|
|||||||
@@ -1,15 +1,25 @@
|
|||||||
.PHONY: coqui
|
.PHONY: coqui
|
||||||
coqui:
|
coqui: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running coqui..."
|
@echo "Running coqui..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "coqui run."
|
@echo "coqui run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing coqui..."
|
@echo "Testing coqui..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "coqui tested."
|
@echo "coqui tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -12,15 +12,25 @@ export SKIP_CONDA=1
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: diffusers
|
.PHONY: diffusers
|
||||||
diffusers:
|
diffusers: protogen
|
||||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||||
bash install.sh $(CONDA_ENV_PATH)
|
bash install.sh $(CONDA_ENV_PATH)
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running diffusers..."
|
@echo "Running diffusers..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "Diffusers run."
|
@echo "Diffusers run."
|
||||||
|
|
||||||
test:
|
test: protogen
|
||||||
bash test.sh
|
bash test.sh
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -61,4 +61,5 @@ dependencies:
|
|||||||
- urllib3==2.0.6
|
- urllib3==2.0.6
|
||||||
- zipp==3.17.0
|
- zipp==3.17.0
|
||||||
- torch
|
- torch
|
||||||
|
- opencv-python
|
||||||
prefix: /opt/conda/envs/diffusers
|
prefix: /opt/conda/envs/diffusers
|
||||||
|
|||||||
@@ -71,4 +71,5 @@ dependencies:
|
|||||||
- typing-extensions==4.8.0
|
- typing-extensions==4.8.0
|
||||||
- urllib3==2.0.6
|
- urllib3==2.0.6
|
||||||
- zipp==3.17.0
|
- zipp==3.17.0
|
||||||
|
- opencv-python
|
||||||
prefix: /opt/conda/envs/diffusers
|
prefix: /opt/conda/envs/diffusers
|
||||||
|
|||||||
@@ -1,11 +1,21 @@
|
|||||||
export CONDA_ENV_PATH = "exllama.yml"
|
export CONDA_ENV_PATH = "exllama.yml"
|
||||||
|
|
||||||
.PHONY: exllama
|
.PHONY: exllama
|
||||||
exllama:
|
exllama: protogen
|
||||||
bash install.sh ${CONDA_ENV_PATH}
|
bash install.sh ${CONDA_ENV_PATH}
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running exllama..."
|
@echo "Running exllama..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "exllama run."
|
@echo "exllama run."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -1,10 +1,20 @@
|
|||||||
.PHONY: exllama2
|
.PHONY: exllama2
|
||||||
exllama2:
|
exllama2: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
bash install.sh
|
bash install.sh
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running exllama2..."
|
@echo "Running exllama2..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "exllama2 run."
|
@echo "exllama2 run."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -1,16 +1,26 @@
|
|||||||
.PHONY: mamba
|
.PHONY: mamba
|
||||||
mamba:
|
mamba: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
bash install.sh
|
bash install.sh
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running mamba..."
|
@echo "Running mamba..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "mamba run."
|
@echo "mamba run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing mamba..."
|
@echo "Testing mamba..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "mamba tested."
|
@echo "mamba tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
39
backend/python/parler-tts/Makefile
Normal file
39
backend/python/parler-tts/Makefile
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
export CONDA_ENV_PATH = "parler.yml"
|
||||||
|
SKIP_CONDA?=0
|
||||||
|
ifeq ($(BUILD_TYPE), cublas)
|
||||||
|
export CONDA_ENV_PATH = "parler-nvidia.yml"
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Intel GPU are supposed to have dependencies installed in the main python
|
||||||
|
# environment, so we skip conda installation for SYCL builds.
|
||||||
|
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||||
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
|
export SKIP_CONDA=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
.PHONY: parler-tts
|
||||||
|
parler-tts: protogen
|
||||||
|
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||||
|
bash install.sh $(CONDA_ENV_PATH)
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run: protogen
|
||||||
|
@echo "Running transformers..."
|
||||||
|
bash run.sh
|
||||||
|
@echo "transformers run."
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
|
test: protogen
|
||||||
|
@echo "Testing transformers..."
|
||||||
|
bash test.sh
|
||||||
|
@echo "transformers tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
39
backend/python/parler-tts/install.sh
Executable file
39
backend/python/parler-tts/install.sh
Executable file
@@ -0,0 +1,39 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||||
|
|
||||||
|
# Check if environment exist
|
||||||
|
conda_env_exists(){
|
||||||
|
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ $SKIP_CONDA -eq 1 ]; then
|
||||||
|
echo "Skipping conda environment installation"
|
||||||
|
else
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
if conda_env_exists "parler" ; then
|
||||||
|
echo "Creating virtual environment..."
|
||||||
|
conda env create --name parler --file $1
|
||||||
|
echo "Virtual environment created."
|
||||||
|
else
|
||||||
|
echo "Virtual environment already exists."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $SKIP_CONDA -ne 1 ]; then
|
||||||
|
# Activate conda environment
|
||||||
|
source activate parler
|
||||||
|
# https://github.com/descriptinc/audiotools/issues/101
|
||||||
|
# incompatible protobuf versions.
|
||||||
|
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o $CONDA_PREFIX/lib/python3.11/site-packages/google/protobuf/internal/builder.py
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||||
|
if [ $SKIP_CONDA -ne 1 ]; then
|
||||||
|
# Activate conda environment
|
||||||
|
source activate parler
|
||||||
|
fi
|
||||||
|
|
||||||
|
pip cache purge
|
||||||
|
fi
|
||||||
48
backend/python/parler-tts/parler-nvidia.yml
Normal file
48
backend/python/parler-tts/parler-nvidia.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
name: parler
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=main
|
||||||
|
- _openmp_mutex=5.1=1_gnu
|
||||||
|
- bzip2=1.0.8=h7b6447c_0
|
||||||
|
- ca-certificates=2023.08.22=h06a4308_0
|
||||||
|
- ld_impl_linux-64=2.38=h1181459_1
|
||||||
|
- libffi=3.4.4=h6a678d5_0
|
||||||
|
- libgcc-ng=11.2.0=h1234567_1
|
||||||
|
- libgomp=11.2.0=h1234567_1
|
||||||
|
- libstdcxx-ng=11.2.0=h1234567_1
|
||||||
|
- libuuid=1.41.5=h5eee18b_0
|
||||||
|
- ncurses=6.4=h6a678d5_0
|
||||||
|
- openssl=3.0.11=h7f8727e_2
|
||||||
|
- pip=23.2.1=py311h06a4308_0
|
||||||
|
- python=3.11.5=h955ad1f_0
|
||||||
|
- readline=8.2=h5eee18b_0
|
||||||
|
- setuptools=68.0.0=py311h06a4308_0
|
||||||
|
- sqlite=3.41.2=h5eee18b_0
|
||||||
|
- tk=8.6.12=h1ccaba5_0
|
||||||
|
- tzdata=2023c=h04d1e81_0
|
||||||
|
- wheel=0.41.2=py311h06a4308_0
|
||||||
|
- xz=5.4.2=h5eee18b_0
|
||||||
|
- zlib=1.2.13=h5eee18b_0
|
||||||
|
- pip:
|
||||||
|
- accelerate>=0.11.0
|
||||||
|
- grpcio==1.59.0
|
||||||
|
- numpy==1.26.0
|
||||||
|
- nvidia-cublas-cu12==12.1.3.1
|
||||||
|
- nvidia-cuda-cupti-cu12==12.1.105
|
||||||
|
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||||
|
- nvidia-cuda-runtime-cu12==12.1.105
|
||||||
|
- nvidia-cudnn-cu12==8.9.2.26
|
||||||
|
- nvidia-cufft-cu12==11.0.2.54
|
||||||
|
- nvidia-curand-cu12==10.3.2.106
|
||||||
|
- nvidia-cusolver-cu12==11.4.5.107
|
||||||
|
- nvidia-cusparse-cu12==12.1.0.106
|
||||||
|
- nvidia-nccl-cu12==2.18.1
|
||||||
|
- nvidia-nvjitlink-cu12==12.2.140
|
||||||
|
- nvidia-nvtx-cu12==12.1.105
|
||||||
|
- torch==2.1.0
|
||||||
|
- transformers>=4.34.0
|
||||||
|
- descript-audio-codec
|
||||||
|
- sentencepiece
|
||||||
|
- git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||||
|
prefix: /opt/conda/envs/diffusers
|
||||||
36
backend/python/parler-tts/parler.yml
Normal file
36
backend/python/parler-tts/parler.yml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
name: parler
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _libgcc_mutex=0.1=main
|
||||||
|
- _openmp_mutex=5.1=1_gnu
|
||||||
|
- bzip2=1.0.8=h7b6447c_0
|
||||||
|
- ca-certificates=2023.08.22=h06a4308_0
|
||||||
|
- ld_impl_linux-64=2.38=h1181459_1
|
||||||
|
- libffi=3.4.4=h6a678d5_0
|
||||||
|
- libgcc-ng=11.2.0=h1234567_1
|
||||||
|
- libgomp=11.2.0=h1234567_1
|
||||||
|
- libstdcxx-ng=11.2.0=h1234567_1
|
||||||
|
- libuuid=1.41.5=h5eee18b_0
|
||||||
|
- ncurses=6.4=h6a678d5_0
|
||||||
|
- openssl=3.0.11=h7f8727e_2
|
||||||
|
- pip=23.2.1=py311h06a4308_0
|
||||||
|
- python=3.11.5=h955ad1f_0
|
||||||
|
- readline=8.2=h5eee18b_0
|
||||||
|
- setuptools=68.0.0=py311h06a4308_0
|
||||||
|
- sqlite=3.41.2=h5eee18b_0
|
||||||
|
- tk=8.6.12=h1ccaba5_0
|
||||||
|
- tzdata=2023c=h04d1e81_0
|
||||||
|
- wheel=0.41.2=py311h06a4308_0
|
||||||
|
- xz=5.4.2=h5eee18b_0
|
||||||
|
- zlib=1.2.13=h5eee18b_0
|
||||||
|
- pip:
|
||||||
|
- accelerate>=0.11.0
|
||||||
|
- numpy==1.26.0
|
||||||
|
- grpcio==1.59.0
|
||||||
|
- torch==2.1.0
|
||||||
|
- transformers>=4.34.0
|
||||||
|
- descript-audio-codec
|
||||||
|
- sentencepiece
|
||||||
|
- git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||||
|
prefix: /opt/conda/envs/parler
|
||||||
125
backend/python/parler-tts/parler_tts_server.py
Normal file
125
backend/python/parler-tts/parler_tts_server.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Extra gRPC server for MusicgenForConditionalGeneration models.
|
||||||
|
"""
|
||||||
|
from concurrent import futures
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
from scipy.io.wavfile import write as write_wav
|
||||||
|
|
||||||
|
from parler_tts import ParlerTTSForConditionalGeneration
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
import soundfile as sf
|
||||||
|
import torch
|
||||||
|
|
||||||
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
|
||||||
|
# Implement the BackendServicer class with the service methods
|
||||||
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
"""
|
||||||
|
A gRPC servicer for the backend service.
|
||||||
|
|
||||||
|
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||||
|
"""
|
||||||
|
def Health(self, request, context):
|
||||||
|
"""
|
||||||
|
A gRPC method that returns the health status of the backend service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: A HealthRequest object that contains the request parameters.
|
||||||
|
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A Reply object that contains the health status of the backend service.
|
||||||
|
"""
|
||||||
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
"""
|
||||||
|
A gRPC method that loads a model into memory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: A LoadModelRequest object that contains the request parameters.
|
||||||
|
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A Result object that contains the result of the LoadModel operation.
|
||||||
|
"""
|
||||||
|
model_name = request.Model
|
||||||
|
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||||
|
try:
|
||||||
|
self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
|
def TTS(self, request, context):
|
||||||
|
model_name = request.model
|
||||||
|
voice = request.voice
|
||||||
|
if voice == "":
|
||||||
|
voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
||||||
|
if model_name == "":
|
||||||
|
return backend_pb2.Result(success=False, message="request.model is required")
|
||||||
|
try:
|
||||||
|
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||||
|
input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
|
||||||
|
prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
|
||||||
|
|
||||||
|
generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
||||||
|
audio_arr = generation.cpu().numpy().squeeze()
|
||||||
|
print("[parler-tts] TTS generated!", file=sys.stderr)
|
||||||
|
sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
|
||||||
|
print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
|
||||||
|
print("[parler-tts] TTS for", file=sys.stderr)
|
||||||
|
print(request, file=sys.stderr)
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
|
|
||||||
|
def serve(address):
|
||||||
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
|
server.add_insecure_port(address)
|
||||||
|
server.start()
|
||||||
|
print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
|
||||||
|
|
||||||
|
# Define the signal handler function
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
print("[parler-tts] Received termination signal. Shutting down...")
|
||||||
|
server.stop(0)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Set the signal handlers for SIGINT and SIGTERM
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
server.stop(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
print(f"[parler-tts] startup: {args}", file=sys.stderr)
|
||||||
|
serve(args.addr)
|
||||||
16
backend/python/parler-tts/run.sh
Normal file
16
backend/python/parler-tts/run.sh
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
##
|
||||||
|
## A bash script wrapper that runs the parler-tts server with conda
|
||||||
|
|
||||||
|
echo "Launching gRPC server for parler-tts"
|
||||||
|
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate parler
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python $DIR/parler_tts_server.py $@
|
||||||
11
backend/python/parler-tts/test.sh
Normal file
11
backend/python/parler-tts/test.sh
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
##
|
||||||
|
## A bash script wrapper that runs the transformers server with conda
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate parler
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python -m unittest $DIR/test_parler.py
|
||||||
81
backend/python/parler-tts/test_parler.py
Normal file
81
backend/python/parler-tts/test_parler.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
"""
|
||||||
|
A test script to test the gRPC service
|
||||||
|
"""
|
||||||
|
import unittest
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackendServicer(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
TestBackendServicer is the class that tests the gRPC service
|
||||||
|
"""
|
||||||
|
def setUp(self):
|
||||||
|
"""
|
||||||
|
This method sets up the gRPC service by starting the server
|
||||||
|
"""
|
||||||
|
self.service = subprocess.Popen(["python3", "parler_tts_server.py", "--addr", "localhost:50051"])
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
"""
|
||||||
|
This method tears down the gRPC service by terminating the server
|
||||||
|
"""
|
||||||
|
self.service.terminate()
|
||||||
|
self.service.wait()
|
||||||
|
|
||||||
|
def test_server_startup(self):
|
||||||
|
"""
|
||||||
|
This method tests if the server starts up successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.Health(backend_pb2.HealthMessage())
|
||||||
|
self.assertEqual(response.message, b'OK')
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("Server failed to start")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_load_model(self):
|
||||||
|
"""
|
||||||
|
This method tests if the model is loaded successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
self.assertEqual(response.message, "Model loaded successfully")
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("LoadModel service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_tts(self):
|
||||||
|
"""
|
||||||
|
This method tests if the embeddings are generated successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
|
||||||
|
tts_response = stub.TTS(tts_request)
|
||||||
|
self.assertIsNotNone(tts_response)
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("TTS service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
@@ -1,17 +1,27 @@
|
|||||||
.PHONY: petals
|
.PHONY: petals
|
||||||
petals:
|
petals: protogen
|
||||||
@echo "Creating virtual environment..."
|
@echo "Creating virtual environment..."
|
||||||
bash install.sh "petals.yml"
|
bash install.sh "petals.yml"
|
||||||
@echo "Virtual environment created."
|
@echo "Virtual environment created."
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running petals..."
|
@echo "Running petals..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "petals run."
|
@echo "petals run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing petals..."
|
@echo "Testing petals..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "petals tested."
|
@echo "petals tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
27
backend/python/rerankers/Makefile
Normal file
27
backend/python/rerankers/Makefile
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
.PHONY: rerankers
|
||||||
|
rerankers: protogen
|
||||||
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
|
run: protogen
|
||||||
|
@echo "Running rerankers..."
|
||||||
|
bash run.sh
|
||||||
|
@echo "rerankers run."
|
||||||
|
|
||||||
|
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||||
|
.PHONY: test
|
||||||
|
test: protogen
|
||||||
|
@echo "Testing rerankers..."
|
||||||
|
bash test.sh
|
||||||
|
@echo "rerankers tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
5
backend/python/rerankers/README.md
Normal file
5
backend/python/rerankers/README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Creating a separate environment for the reranker project
|
||||||
|
|
||||||
|
```
|
||||||
|
make reranker
|
||||||
|
```
|
||||||
123
backend/python/rerankers/reranker.py
Executable file
123
backend/python/rerankers/reranker.py
Executable file
@@ -0,0 +1,123 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Extra gRPC server for Rerankers models.
|
||||||
|
"""
|
||||||
|
from concurrent import futures
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
from rerankers import Reranker
|
||||||
|
|
||||||
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
|
||||||
|
# Implement the BackendServicer class with the service methods
|
||||||
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
|
"""
|
||||||
|
A gRPC servicer for the backend service.
|
||||||
|
|
||||||
|
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||||
|
"""
|
||||||
|
def Health(self, request, context):
|
||||||
|
"""
|
||||||
|
A gRPC method that returns the health status of the backend service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: A HealthRequest object that contains the request parameters.
|
||||||
|
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A Reply object that contains the health status of the backend service.
|
||||||
|
"""
|
||||||
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
|
||||||
|
def LoadModel(self, request, context):
|
||||||
|
"""
|
||||||
|
A gRPC method that loads a model into memory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: A LoadModelRequest object that contains the request parameters.
|
||||||
|
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A Result object that contains the result of the LoadModel operation.
|
||||||
|
"""
|
||||||
|
model_name = request.Model
|
||||||
|
try:
|
||||||
|
kwargs = {}
|
||||||
|
if request.Type != "":
|
||||||
|
kwargs['model_type'] = request.Type
|
||||||
|
if request.PipelineType != "": # Reuse the PipelineType field for language
|
||||||
|
kwargs['lang'] = request.PipelineType
|
||||||
|
self.model_name = model_name
|
||||||
|
self.model = Reranker(model_name, **kwargs)
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
|
# Implement your logic here for the LoadModel service
|
||||||
|
# Replace this with your desired response
|
||||||
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
|
def Rerank(self, request, context):
|
||||||
|
documents = []
|
||||||
|
for idx, doc in enumerate(request.documents):
|
||||||
|
documents.append(doc)
|
||||||
|
ranked_results=self.model.rank(query=request.query, docs=documents, doc_ids=list(range(len(request.documents))))
|
||||||
|
# Prepare results to return
|
||||||
|
results = [
|
||||||
|
backend_pb2.DocumentResult(
|
||||||
|
index=res.doc_id,
|
||||||
|
text=res.text,
|
||||||
|
relevance_score=res.score
|
||||||
|
) for res in ranked_results.results
|
||||||
|
]
|
||||||
|
|
||||||
|
# Calculate the usage and total tokens
|
||||||
|
# TODO: Implement the usage calculation with reranker
|
||||||
|
total_tokens = sum(len(doc.split()) for doc in request.documents) + len(request.query.split())
|
||||||
|
prompt_tokens = len(request.query.split())
|
||||||
|
usage = backend_pb2.Usage(total_tokens=total_tokens, prompt_tokens=prompt_tokens)
|
||||||
|
return backend_pb2.RerankResult(usage=usage, results=results)
|
||||||
|
|
||||||
|
def serve(address):
|
||||||
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
|
server.add_insecure_port(address)
|
||||||
|
server.start()
|
||||||
|
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||||
|
|
||||||
|
# Define the signal handler function
|
||||||
|
def signal_handler(sig, frame):
|
||||||
|
print("Received termination signal. Shutting down...")
|
||||||
|
server.stop(0)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Set the signal handlers for SIGINT and SIGTERM
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
server.stop(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
serve(args.addr)
|
||||||
14
backend/python/rerankers/run.sh
Executable file
14
backend/python/rerankers/run.sh
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
##
|
||||||
|
## A bash script wrapper that runs the reranker server with conda
|
||||||
|
|
||||||
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python $DIR/reranker.py $@
|
||||||
11
backend/python/rerankers/test.sh
Executable file
11
backend/python/rerankers/test.sh
Executable file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
##
|
||||||
|
## A bash script wrapper that runs the reranker server with conda
|
||||||
|
|
||||||
|
# Activate conda environment
|
||||||
|
source activate transformers
|
||||||
|
|
||||||
|
# get the directory where the bash script is located
|
||||||
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
python -m unittest $DIR/test_reranker.py
|
||||||
90
backend/python/rerankers/test_reranker.py
Executable file
90
backend/python/rerankers/test_reranker.py
Executable file
@@ -0,0 +1,90 @@
|
|||||||
|
"""
|
||||||
|
A test script to test the gRPC service
|
||||||
|
"""
|
||||||
|
import unittest
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import backend_pb2
|
||||||
|
import backend_pb2_grpc
|
||||||
|
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackendServicer(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
TestBackendServicer is the class that tests the gRPC service
|
||||||
|
"""
|
||||||
|
def setUp(self):
|
||||||
|
"""
|
||||||
|
This method sets up the gRPC service by starting the server
|
||||||
|
"""
|
||||||
|
self.service = subprocess.Popen(["python3", "reranker.py", "--addr", "localhost:50051"])
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
"""
|
||||||
|
This method tears down the gRPC service by terminating the server
|
||||||
|
"""
|
||||||
|
self.service.kill()
|
||||||
|
self.service.wait()
|
||||||
|
|
||||||
|
def test_server_startup(self):
|
||||||
|
"""
|
||||||
|
This method tests if the server starts up successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.Health(backend_pb2.HealthMessage())
|
||||||
|
self.assertEqual(response.message, b'OK')
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("Server failed to start")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_load_model(self):
|
||||||
|
"""
|
||||||
|
This method tests if the model is loaded successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
self.assertEqual(response.message, "Model loaded successfully")
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("LoadModel service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
|
|
||||||
|
def test_rerank(self):
|
||||||
|
"""
|
||||||
|
This method tests if the embeddings are generated successfully
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.setUp()
|
||||||
|
with grpc.insecure_channel("localhost:50051") as channel:
|
||||||
|
stub = backend_pb2_grpc.BackendStub(channel)
|
||||||
|
request = backend_pb2.RerankRequest(
|
||||||
|
query="I love you",
|
||||||
|
documents=["I hate you", "I really like you"],
|
||||||
|
top_n=2
|
||||||
|
)
|
||||||
|
response = stub.LoadModel(backend_pb2.ModelOptions(Model="cross-encoder"))
|
||||||
|
self.assertTrue(response.success)
|
||||||
|
|
||||||
|
rerank_response = stub.Rerank(request)
|
||||||
|
print(rerank_response.results[0])
|
||||||
|
self.assertIsNotNone(rerank_response.results)
|
||||||
|
self.assertEqual(len(rerank_response.results), 2)
|
||||||
|
self.assertEqual(rerank_response.results[0].text, "I really like you")
|
||||||
|
self.assertEqual(rerank_response.results[1].text, "I hate you")
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
self.fail("Reranker service failed")
|
||||||
|
finally:
|
||||||
|
self.tearDown()
|
||||||
@@ -1,17 +1,27 @@
|
|||||||
.PHONY: sentencetransformers
|
.PHONY: sentencetransformers
|
||||||
sentencetransformers:
|
sentencetransformers: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running sentencetransformers..."
|
@echo "Running sentencetransformers..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "sentencetransformers run."
|
@echo "sentencetransformers run."
|
||||||
|
|
||||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing sentencetransformers..."
|
@echo "Testing sentencetransformers..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "sentencetransformers tested."
|
@echo "sentencetransformers tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -1,16 +1,25 @@
|
|||||||
|
|
||||||
.PHONY: transformers-musicgen
|
.PHONY: transformers-musicgen
|
||||||
transformers-musicgen:
|
transformers-musicgen: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running transformers..."
|
@echo "Running transformers..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "transformers run."
|
@echo "transformers run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing transformers..."
|
@echo "Testing transformers..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "transformers tested."
|
@echo "transformers tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -8,7 +8,7 @@ echo "Launching gRPC server for transformers-musicgen"
|
|||||||
export PATH=$PATH:/opt/conda/bin
|
export PATH=$PATH:/opt/conda/bin
|
||||||
|
|
||||||
# Activate conda environment
|
# Activate conda environment
|
||||||
source activate transformers-musicgen
|
source activate transformers
|
||||||
|
|
||||||
# get the directory where the bash script is located
|
# get the directory where the bash script is located
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|||||||
@@ -1,16 +1,26 @@
|
|||||||
.PHONY: transformers
|
.PHONY: transformers
|
||||||
transformers:
|
transformers: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running transformers..."
|
@echo "Running transformers..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "transformers run."
|
@echo "transformers run."
|
||||||
|
|
||||||
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
# It is not working well by using command line. It only6 works with IDE like VSCode.
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing transformers..."
|
@echo "Testing transformers..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "transformers tested."
|
@echo "transformers tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -148,7 +148,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
else:
|
else:
|
||||||
device_map="CPU"
|
device_map="CPU"
|
||||||
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
||||||
compile=True,
|
compile=True,
|
||||||
|
trust_remote_code=request.TrustRemoteCode,
|
||||||
|
ov_config={"PERFORMANCE_HINT": "LATENCY"},
|
||||||
device=device_map)
|
device=device_map)
|
||||||
self.OV = True
|
self.OV = True
|
||||||
else:
|
else:
|
||||||
@@ -158,6 +160,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
quantization_config=quantization,
|
quantization_config=quantization,
|
||||||
device_map=device_map,
|
device_map=device_map,
|
||||||
torch_dtype=compute)
|
torch_dtype=compute)
|
||||||
|
if request.ContextSize > 0:
|
||||||
|
self.max_tokens = request.ContextSize
|
||||||
|
else:
|
||||||
|
self.max_tokens = self.model.config.max_position_embeddings
|
||||||
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
|
||||||
self.XPU = False
|
self.XPU = False
|
||||||
|
|
||||||
@@ -212,12 +219,27 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
set_seed(request.Seed)
|
set_seed(request.Seed)
|
||||||
if request.TopP == 0:
|
if request.TopP == 0:
|
||||||
request.TopP = 0.9
|
request.TopP = 0.9
|
||||||
|
|
||||||
|
if request.TopK == 0:
|
||||||
|
request.TopK = 40
|
||||||
|
|
||||||
|
prompt = request.Prompt
|
||||||
|
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||||
|
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
|
eos_token_id = self.tokenizer.eos_token_id
|
||||||
|
if request.StopPrompts:
|
||||||
|
eos_token_id = []
|
||||||
|
for word in request.StopPrompts:
|
||||||
|
eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
|
||||||
|
|
||||||
|
inputs = self.tokenizer(prompt, return_tensors="pt")
|
||||||
|
|
||||||
max_tokens = 200
|
|
||||||
if request.Tokens > 0:
|
if request.Tokens > 0:
|
||||||
max_tokens = request.Tokens
|
max_tokens = request.Tokens
|
||||||
|
else:
|
||||||
|
max_tokens = self.max_tokens - inputs["input_ids"].size()[inputs["input_ids"].dim()-1]
|
||||||
|
|
||||||
inputs = self.tokenizer(request.Prompt, return_tensors="pt")
|
|
||||||
if self.CUDA:
|
if self.CUDA:
|
||||||
inputs = inputs.to("cuda")
|
inputs = inputs.to("cuda")
|
||||||
if XPU and self.OV == False:
|
if XPU and self.OV == False:
|
||||||
@@ -235,7 +257,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
top_k=request.TopK,
|
top_k=request.TopK,
|
||||||
do_sample=True,
|
do_sample=True,
|
||||||
attention_mask=inputs["attention_mask"],
|
attention_mask=inputs["attention_mask"],
|
||||||
eos_token_id=self.tokenizer.eos_token_id,
|
eos_token_id=eos_token_id,
|
||||||
pad_token_id=self.tokenizer.eos_token_id,
|
pad_token_id=self.tokenizer.eos_token_id,
|
||||||
streamer=streamer)
|
streamer=streamer)
|
||||||
thread=Thread(target=self.model.generate, kwargs=config)
|
thread=Thread(target=self.model.generate, kwargs=config)
|
||||||
@@ -264,7 +286,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
top_k=request.TopK,
|
top_k=request.TopK,
|
||||||
do_sample=True,
|
do_sample=True,
|
||||||
attention_mask=inputs["attention_mask"],
|
attention_mask=inputs["attention_mask"],
|
||||||
eos_token_id=self.tokenizer.eos_token_id,
|
eos_token_id=eos_token_id,
|
||||||
pad_token_id=self.tokenizer.eos_token_id)
|
pad_token_id=self.tokenizer.eos_token_id)
|
||||||
generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
|
generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
|
||||||
|
|
||||||
|
|||||||
@@ -3,18 +3,28 @@ export SKIP_CONDA=1
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: ttsvalle
|
.PHONY: ttsvalle
|
||||||
ttsvalle:
|
ttsvalle: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
bash install.sh
|
bash install.sh
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running ttsvalle..."
|
@echo "Running ttsvalle..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "ttsvalle run."
|
@echo "ttsvalle run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing valle..."
|
@echo "Testing valle..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "valle tested."
|
@echo "valle tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -1,15 +1,25 @@
|
|||||||
.PHONY: vllm
|
.PHONY: vllm
|
||||||
vllm:
|
vllm: protogen
|
||||||
$(MAKE) -C ../common-env/transformers
|
$(MAKE) -C ../common-env/transformers
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run:
|
run: protogen
|
||||||
@echo "Running vllm..."
|
@echo "Running vllm..."
|
||||||
bash run.sh
|
bash run.sh
|
||||||
@echo "vllm run."
|
@echo "vllm run."
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test: protogen
|
||||||
@echo "Testing vllm..."
|
@echo "Testing vllm..."
|
||||||
bash test.sh
|
bash test.sh
|
||||||
@echo "vllm tested."
|
@echo "vllm tested."
|
||||||
|
|
||||||
|
.PHONY: protogen
|
||||||
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
.PHONY: protogen-clean
|
||||||
|
protogen-clean:
|
||||||
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,363 +0,0 @@
|
|||||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
||||||
"""Client and server classes corresponding to protobuf-defined services."""
|
|
||||||
import grpc
|
|
||||||
|
|
||||||
import backend_pb2 as backend__pb2
|
|
||||||
|
|
||||||
|
|
||||||
class BackendStub(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def __init__(self, channel):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
channel: A grpc.Channel.
|
|
||||||
"""
|
|
||||||
self.Health = channel.unary_unary(
|
|
||||||
'/backend.Backend/Health',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Predict = channel.unary_unary(
|
|
||||||
'/backend.Backend/Predict',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.LoadModel = channel.unary_unary(
|
|
||||||
'/backend.Backend/LoadModel',
|
|
||||||
request_serializer=backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.PredictStream = channel.unary_stream(
|
|
||||||
'/backend.Backend/PredictStream',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Reply.FromString,
|
|
||||||
)
|
|
||||||
self.Embedding = channel.unary_unary(
|
|
||||||
'/backend.Backend/Embedding',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.EmbeddingResult.FromString,
|
|
||||||
)
|
|
||||||
self.GenerateImage = channel.unary_unary(
|
|
||||||
'/backend.Backend/GenerateImage',
|
|
||||||
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.AudioTranscription = channel.unary_unary(
|
|
||||||
'/backend.Backend/AudioTranscription',
|
|
||||||
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TranscriptResult.FromString,
|
|
||||||
)
|
|
||||||
self.TTS = channel.unary_unary(
|
|
||||||
'/backend.Backend/TTS',
|
|
||||||
request_serializer=backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.Result.FromString,
|
|
||||||
)
|
|
||||||
self.TokenizeString = channel.unary_unary(
|
|
||||||
'/backend.Backend/TokenizeString',
|
|
||||||
request_serializer=backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.TokenizationResponse.FromString,
|
|
||||||
)
|
|
||||||
self.Status = channel.unary_unary(
|
|
||||||
'/backend.Backend/Status',
|
|
||||||
request_serializer=backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
response_deserializer=backend__pb2.StatusResponse.FromString,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BackendServicer(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
def Health(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Predict(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def PredictStream(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Embedding(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def GenerateImage(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def AudioTranscription(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TTS(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def TokenizeString(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
def Status(self, request, context):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
||||||
context.set_details('Method not implemented!')
|
|
||||||
raise NotImplementedError('Method not implemented!')
|
|
||||||
|
|
||||||
|
|
||||||
def add_BackendServicer_to_server(servicer, server):
|
|
||||||
rpc_method_handlers = {
|
|
||||||
'Health': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Health,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Predict': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Predict,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'LoadModel': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.LoadModel,
|
|
||||||
request_deserializer=backend__pb2.ModelOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'PredictStream': grpc.unary_stream_rpc_method_handler(
|
|
||||||
servicer.PredictStream,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.Reply.SerializeToString,
|
|
||||||
),
|
|
||||||
'Embedding': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Embedding,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'GenerateImage': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.GenerateImage,
|
|
||||||
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.AudioTranscription,
|
|
||||||
request_deserializer=backend__pb2.TranscriptRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
|
|
||||||
),
|
|
||||||
'TTS': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TTS,
|
|
||||||
request_deserializer=backend__pb2.TTSRequest.FromString,
|
|
||||||
response_serializer=backend__pb2.Result.SerializeToString,
|
|
||||||
),
|
|
||||||
'TokenizeString': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.TokenizeString,
|
|
||||||
request_deserializer=backend__pb2.PredictOptions.FromString,
|
|
||||||
response_serializer=backend__pb2.TokenizationResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
'Status': grpc.unary_unary_rpc_method_handler(
|
|
||||||
servicer.Status,
|
|
||||||
request_deserializer=backend__pb2.HealthMessage.FromString,
|
|
||||||
response_serializer=backend__pb2.StatusResponse.SerializeToString,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
generic_handler = grpc.method_handlers_generic_handler(
|
|
||||||
'backend.Backend', rpc_method_handlers)
|
|
||||||
server.add_generic_rpc_handlers((generic_handler,))
|
|
||||||
|
|
||||||
|
|
||||||
# This class is part of an EXPERIMENTAL API.
|
|
||||||
class Backend(object):
|
|
||||||
"""Missing associated documentation comment in .proto file."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Health(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Predict(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def LoadModel(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
|
|
||||||
backend__pb2.ModelOptions.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def PredictStream(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.Reply.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Embedding(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.EmbeddingResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def GenerateImage(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
|
|
||||||
backend__pb2.GenerateImageRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def AudioTranscription(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
|
|
||||||
backend__pb2.TranscriptRequest.SerializeToString,
|
|
||||||
backend__pb2.TranscriptResult.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TTS(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
|
|
||||||
backend__pb2.TTSRequest.SerializeToString,
|
|
||||||
backend__pb2.Result.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def TokenizeString(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TokenizeString',
|
|
||||||
backend__pb2.PredictOptions.SerializeToString,
|
|
||||||
backend__pb2.TokenizationResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def Status(request,
|
|
||||||
target,
|
|
||||||
options=(),
|
|
||||||
channel_credentials=None,
|
|
||||||
call_credentials=None,
|
|
||||||
insecure=False,
|
|
||||||
compression=None,
|
|
||||||
wait_for_ready=None,
|
|
||||||
timeout=None,
|
|
||||||
metadata=None):
|
|
||||||
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Status',
|
|
||||||
backend__pb2.HealthMessage.SerializeToString,
|
|
||||||
backend__pb2.StatusResponse.FromString,
|
|
||||||
options, channel_credentials,
|
|
||||||
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
|
||||||
@@ -14,6 +14,7 @@ from vllm.engine.arg_utils import AsyncEngineArgs
|
|||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.utils import random_uuid
|
from vllm.utils import random_uuid
|
||||||
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
@@ -71,7 +72,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
"""
|
"""
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
async def LoadModel(self, request, context):
|
||||||
"""
|
"""
|
||||||
Loads a language model.
|
Loads a language model.
|
||||||
|
|
||||||
@@ -94,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
engine_args.trust_remote_code = request.TrustRemoteCode
|
engine_args.trust_remote_code = request.TrustRemoteCode
|
||||||
if request.EnforceEager:
|
if request.EnforceEager:
|
||||||
engine_args.enforce_eager = request.EnforceEager
|
engine_args.enforce_eager = request.EnforceEager
|
||||||
|
if request.TensorParallelSize:
|
||||||
|
engine_args.tensor_parallel_size = request.TensorParallelSize
|
||||||
if request.SwapSpace != 0:
|
if request.SwapSpace != 0:
|
||||||
engine_args.swap_space = request.SwapSpace
|
engine_args.swap_space = request.SwapSpace
|
||||||
if request.MaxModelLen != 0:
|
if request.MaxModelLen != 0:
|
||||||
@@ -103,6 +106,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
engine_model_config = await self.llm.get_model_config()
|
||||||
|
self.tokenizer = get_tokenizer(
|
||||||
|
engine_model_config.tokenizer,
|
||||||
|
tokenizer_mode=engine_model_config.tokenizer_mode,
|
||||||
|
trust_remote_code=engine_model_config.trust_remote_code,
|
||||||
|
truncation_side="left",
|
||||||
|
)
|
||||||
|
except Exception as err:
|
||||||
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
async def Predict(self, request, context):
|
async def Predict(self, request, context):
|
||||||
@@ -161,9 +176,15 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.Seed != 0:
|
if request.Seed != 0:
|
||||||
sampling_params.seed = request.Seed
|
sampling_params.seed = request.Seed
|
||||||
|
|
||||||
|
prompt = request.Prompt
|
||||||
|
|
||||||
|
# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
|
||||||
|
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||||
|
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
# Generate text
|
# Generate text
|
||||||
request_id = random_uuid()
|
request_id = random_uuid()
|
||||||
outputs = self.llm.generate(request.Prompt, sampling_params, request_id)
|
outputs = self.llm.generate(prompt, sampling_params, request_id)
|
||||||
|
|
||||||
# Stream the results
|
# Stream the results
|
||||||
generated_text = ""
|
generated_text = ""
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package backend
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -9,9 +10,11 @@ import (
|
|||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/core/config"
|
"github.com/go-skynet/LocalAI/core/config"
|
||||||
|
"github.com/go-skynet/LocalAI/core/schema"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc"
|
"github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
)
|
)
|
||||||
@@ -26,7 +29,7 @@ type TokenUsage struct {
|
|||||||
Completion int
|
Completion int
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
threads := c.Threads
|
threads := c.Threads
|
||||||
if *threads == 0 && o.Threads != 0 {
|
if *threads == 0 && o.Threads != 0 {
|
||||||
@@ -71,10 +74,30 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var protoMessages []*proto.Message
|
||||||
|
// if we are using the tokenizer template, we need to convert the messages to proto messages
|
||||||
|
// unless the prompt has already been tokenized (non-chat endpoints + functions)
|
||||||
|
if c.TemplateConfig.UseTokenizerTemplate && s == "" {
|
||||||
|
protoMessages = make([]*proto.Message, len(messages), len(messages))
|
||||||
|
for i, message := range messages {
|
||||||
|
protoMessages[i] = &proto.Message{
|
||||||
|
Role: message.Role,
|
||||||
|
}
|
||||||
|
switch ct := message.Content.(type) {
|
||||||
|
case string:
|
||||||
|
protoMessages[i].Content = ct
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
|
||||||
fn := func() (LLMResponse, error) {
|
fn := func() (LLMResponse, error) {
|
||||||
opts := gRPCPredictOpts(c, loader.ModelPath)
|
opts := gRPCPredictOpts(c, loader.ModelPath)
|
||||||
opts.Prompt = s
|
opts.Prompt = s
|
||||||
|
opts.Messages = protoMessages
|
||||||
|
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
||||||
opts.Images = images
|
opts.Images = images
|
||||||
|
|
||||||
tokenUsage := TokenUsage{}
|
tokenUsage := TokenUsage{}
|
||||||
@@ -130,6 +153,12 @@ func ModelInference(ctx context.Context, s string, images []string, loader *mode
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return LLMResponse{}, err
|
return LLMResponse{}, err
|
||||||
}
|
}
|
||||||
|
if tokenUsage.Prompt == 0 {
|
||||||
|
tokenUsage.Prompt = int(reply.PromptTokens)
|
||||||
|
}
|
||||||
|
if tokenUsage.Completion == 0 {
|
||||||
|
tokenUsage.Completion = int(reply.Tokens)
|
||||||
|
}
|
||||||
return LLMResponse{
|
return LLMResponse{
|
||||||
Response: string(reply.Message),
|
Response: string(reply.Message),
|
||||||
Usage: tokenUsage,
|
Usage: tokenUsage,
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
EnforceEager: c.EnforceEager,
|
EnforceEager: c.EnforceEager,
|
||||||
SwapSpace: int32(c.SwapSpace),
|
SwapSpace: int32(c.SwapSpace),
|
||||||
MaxModelLen: int32(c.MaxModelLen),
|
MaxModelLen: int32(c.MaxModelLen),
|
||||||
|
TensorParallelSize: int32(c.TensorParallelSize),
|
||||||
MMProj: c.MMProj,
|
MMProj: c.MMProj,
|
||||||
YarnExtFactor: c.YarnExtFactor,
|
YarnExtFactor: c.YarnExtFactor,
|
||||||
YarnAttnFactor: c.YarnAttnFactor,
|
YarnAttnFactor: c.YarnAttnFactor,
|
||||||
|
|||||||
39
core/backend/rerank.go
Normal file
39
core/backend/rerank.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/core/config"
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||||
|
bb := backend
|
||||||
|
if bb == "" {
|
||||||
|
return nil, fmt.Errorf("backend is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
grpcOpts := gRPCModelOpts(backendConfig)
|
||||||
|
|
||||||
|
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
||||||
|
model.WithBackendString(bb),
|
||||||
|
model.WithModel(modelFile),
|
||||||
|
model.WithContext(appConfig.Context),
|
||||||
|
model.WithAssetDir(appConfig.AssetsDestination),
|
||||||
|
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||||
|
})
|
||||||
|
rerankModel, err := loader.BackendLoader(opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if rerankModel == nil {
|
||||||
|
return nil, fmt.Errorf("could not load rerank model")
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := rerankModel.Rerank(context.Background(), request)
|
||||||
|
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
20
core/cli/cli.go
Normal file
20
core/cli/cli.go
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
package cli
|
||||||
|
|
||||||
|
import "embed"
|
||||||
|
|
||||||
|
type Context struct {
|
||||||
|
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
||||||
|
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
||||||
|
|
||||||
|
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
||||||
|
BackendAssets embed.FS `kong:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var CLI struct {
|
||||||
|
Context `embed:""`
|
||||||
|
|
||||||
|
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
|
||||||
|
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
|
||||||
|
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
|
||||||
|
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user