mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-06 04:33:30 -05:00
Compare commits
33 Commits
v2.15.0
...
functions_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ac47aeaddd | ||
|
|
a670318a9f | ||
|
|
84e2407afa | ||
|
|
c4186f13c3 | ||
|
|
4ac7956f68 | ||
|
|
e49ea0123b | ||
|
|
7123d07456 | ||
|
|
2db22087ae | ||
|
|
fa7b2aee9c | ||
|
|
4d70b6fb2d | ||
|
|
e2c3ffb09b | ||
|
|
b4cb22f444 | ||
|
|
5534b13903 | ||
|
|
5b79bd04a7 | ||
|
|
9d8c705fd9 | ||
|
|
310b2171be | ||
|
|
98af0b5d85 | ||
|
|
ca14f95d2c | ||
|
|
1b69b338c0 | ||
|
|
88942e4761 | ||
|
|
efa32a2677 | ||
|
|
dfc420706c | ||
|
|
e2de8a88f7 | ||
|
|
7f4febd6c2 | ||
|
|
93e581dfd0 | ||
|
|
cf513efa78 | ||
|
|
9e8b34427a | ||
|
|
88d0aa1e40 | ||
|
|
9b09eb005f | ||
|
|
4db41b71f3 | ||
|
|
28a421cb1d | ||
|
|
e6768097f4 | ||
|
|
18a04246fa |
@@ -6,6 +6,11 @@ examples/chatbot-ui/models
|
||||
examples/rwkv/models
|
||||
examples/**/models
|
||||
Dockerfile*
|
||||
__pycache__
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
.scannerwork
|
||||
|
||||
# backend virtual environments
|
||||
**/venv
|
||||
backend/python/**/source
|
||||
32
.github/workflows/release.yaml
vendored
32
.github/workflows/release.yaml
vendored
@@ -1,6 +1,6 @@
|
||||
name: Build and Release
|
||||
|
||||
on:
|
||||
on:
|
||||
- push
|
||||
- pull_request
|
||||
|
||||
@@ -16,15 +16,6 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
build-linux:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build: ''
|
||||
defines: ''
|
||||
- build: 'cuda12'
|
||||
defines: ''
|
||||
- build: 'cuda11'
|
||||
defines: ''
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
@@ -40,17 +31,13 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler
|
||||
- name: Install CUDA Dependencies
|
||||
if: ${{ matrix.build == 'cuda12' || matrix.build == 'cuda11' }}
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" == "cuda12" ]; then
|
||||
export CUDA_VERSION=12-3
|
||||
else
|
||||
export CUDA_VERSION=11-7
|
||||
fi
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-3
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -69,22 +56,15 @@ jobs:
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
||||
- name: Build
|
||||
id: build
|
||||
env:
|
||||
BUILD_ID: "${{ matrix.build }}"
|
||||
run: |
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
if [ "${{ matrix.build }}" == "cuda12" ] || [ "${{ matrix.build }}" == "cuda11" ]; then
|
||||
export BUILD_TYPE=cublas
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
make dist
|
||||
else
|
||||
STATIC=true make dist
|
||||
fi
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-linux-${{ matrix.build }}
|
||||
name: LocalAI-linux
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
|
||||
141
.github/workflows/test-extra.yml
vendored
141
.github/workflows/test-extra.yml
vendored
@@ -25,22 +25,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test transformers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers test
|
||||
|
||||
@@ -55,22 +47,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test sentencetransformers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
|
||||
|
||||
@@ -86,22 +70,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test rerankers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers
|
||||
make --jobs=5 --output-sync=target -C backend/python/rerankers test
|
||||
|
||||
@@ -115,25 +91,16 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y build-essential ffmpeg
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test diffusers
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||
|
||||
tests-parler-tts:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -146,22 +113,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test parler-tts
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||
|
||||
@@ -176,22 +135,14 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test transformers-musicgen
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
|
||||
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
|
||||
|
||||
@@ -208,22 +159,14 @@ jobs:
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
# - name: Test petals
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
||||
|
||||
@@ -280,22 +223,14 @@ jobs:
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
# - name: Test bark
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make --jobs=5 --output-sync=target -C backend/python/bark
|
||||
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
||||
|
||||
@@ -313,20 +248,13 @@ jobs:
|
||||
# run: |
|
||||
# sudo apt-get update
|
||||
# sudo apt-get install build-essential ffmpeg
|
||||
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
# sudo apt-get update && \
|
||||
# sudo apt-get install -y conda
|
||||
# # Install UV
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
# sudo rm -rfv /usr/bin/conda || true
|
||||
# - name: Test vllm
|
||||
# run: |
|
||||
# export PATH=$PATH:/opt/conda/bin
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm test
|
||||
tests-vallex:
|
||||
@@ -340,20 +268,13 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
- name: Test vall-e-x
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
|
||||
|
||||
@@ -368,19 +289,11 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
sudo rm -rfv /usr/bin/conda || true
|
||||
|
||||
- name: Test coqui
|
||||
run: |
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||
16
.github/workflows/test.yml
vendored
16
.github/workflows/test.yml
vendored
@@ -57,7 +57,7 @@ jobs:
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
@@ -78,6 +78,8 @@ jobs:
|
||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install -y conda
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake patch python3-pip unzip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
|
||||
@@ -85,6 +87,12 @@ jobs:
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
|
||||
@@ -100,6 +108,8 @@ jobs:
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
env:
|
||||
CUDA_VERSION: 12-3
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -164,7 +174,7 @@ jobs:
|
||||
df -h
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Build images
|
||||
run: |
|
||||
@@ -190,7 +200,7 @@ jobs:
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
with:
|
||||
submodules: true
|
||||
- name: Setup Go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v5
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -46,4 +46,7 @@ prepare
|
||||
*pb2_grpc.py
|
||||
|
||||
# SonarQube
|
||||
.scannerwork
|
||||
.scannerwork
|
||||
|
||||
# backend virtual environments
|
||||
**/venv
|
||||
113
Dockerfile
113
Dockerfile
@@ -76,26 +76,16 @@ RUN test -n "$TARGETARCH" \
|
||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||
FROM requirements-core AS requirements-extras
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends gpg && \
|
||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
conda && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
espeak-ng \
|
||||
espeak && \
|
||||
espeak \
|
||||
python3-dev \
|
||||
python3-venv && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -246,6 +236,7 @@ ARG FFMPEG
|
||||
ARG BUILD_TYPE
|
||||
ARG TARGETARCH
|
||||
ARG IMAGE_TYPE=extras
|
||||
ARG EXTRA_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
@@ -257,7 +248,6 @@ ARG CUDA_MAJOR_VERSION=11
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV PIP_CACHE_PURGE=true
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
@@ -290,51 +280,58 @@ COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
## Duplicated from Makefile to avoid having a big layer that's hard to push
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/autogptq \
|
||||
# Change the shell to bash so we can use [[ tests below
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||
# Splitting the backends into more groups with fewer items results in a larger image, but a smaller size for the largest layer
|
||||
# Splitting the backends into fewer groups with more items results in a smaller image, but a larger size for the largest layer
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/coqui \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/parler-tts \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/diffusers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/transformers-musicgen \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/bark \
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vall-e-x \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/petals \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "transformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/transformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/diffusers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/vllm \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/mamba \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/sentencetransformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/transformers \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/vall-e-x \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/exllama \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/petals \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/transformers-musicgen \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/parler-tts \
|
||||
; fi
|
||||
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
|
||||
make -C backend/python/coqui \
|
||||
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vllm \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/autogptq \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/bark \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/mamba \
|
||||
; fi
|
||||
|
||||
# Make sure the models directory exists
|
||||
|
||||
64
Makefile
64
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=c12452c7aec8a02264afc00196a13caa591a13ac
|
||||
CPPLLAMA_VERSION?=dc685be46622a8fabfd57cfa804237c8f15679b8
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=73d13ad19a8c9c4da4f405088a85169b1a171e66
|
||||
WHISPER_CPP_VERSION?=4ef8d9f44eb402c528ab6d990ab50a9f4f666347
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
@@ -38,7 +38,7 @@ CGO_LDFLAGS?=
|
||||
CGO_LDFLAGS_WHISPER?=
|
||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||
GO_TAGS?=
|
||||
BUILD_ID?=git
|
||||
BUILD_ID?=
|
||||
|
||||
TEST_DIR=/tmp/test
|
||||
|
||||
@@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifeq ($(OS),Darwin)
|
||||
|
||||
|
||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||
endif
|
||||
@@ -154,8 +154,8 @@ endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-noavx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||
@@ -319,7 +319,14 @@ build-minimal:
|
||||
build-api:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||
|
||||
dist: build
|
||||
dist:
|
||||
STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||
ifeq ($(OS),Darwin)
|
||||
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
|
||||
else
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||
endif
|
||||
$(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
@@ -652,30 +659,37 @@ else
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc
|
||||
$(info ${GREEN}I llama-cpp build info:standard${RESET})
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-default
|
||||
$(MAKE) -C backend/cpp/llama-default purge
|
||||
$(MAKE) VARIANT="llama-default" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-default/grpc-server backend-assets/grpc/llama-cpp
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama-default/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
|
||||
backend-assets/grpc/llama-cpp-noavx: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-noavx
|
||||
$(MAKE) -C backend/cpp/llama-noavx purge
|
||||
$(info ${GREEN}I llama-cpp build info:noavx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF" $(MAKE) VARIANT="llama-noavx" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-noavx/grpc-server backend-assets/grpc/llama-cpp-noavx
|
||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||
$(MAKE) -C backend/cpp/llama-avx purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||
|
||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||
$(MAKE) -C backend/cpp/llama-cuda purge
|
||||
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
@@ -719,7 +733,7 @@ docker:
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
||||
-t $(DOCKER_IMAGE) .
|
||||
|
||||
|
||||
docker-aio:
|
||||
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
|
||||
docker build \
|
||||
|
||||
@@ -212,6 +212,9 @@ message ModelOptions {
|
||||
float YarnBetaSlow = 47;
|
||||
|
||||
string Type = 49;
|
||||
|
||||
bool FlashAttention = 56;
|
||||
bool NoKVOffload = 57;
|
||||
}
|
||||
|
||||
message Result {
|
||||
|
||||
@@ -62,8 +62,8 @@ grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
bash -c "source $(ONEAPI_VARS); \
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)"
|
||||
else
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release
|
||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && $(MAKE)
|
||||
endif
|
||||
cp llama.cpp/build/bin/grpc-server .
|
||||
@@ -2254,6 +2254,9 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
}
|
||||
params.use_mlock = request->mlock();
|
||||
params.use_mmap = request->mmap();
|
||||
params.flash_attn = request->flashattention();
|
||||
params.no_kv_offload = request->nokvoffload();
|
||||
|
||||
params.embedding = request->embeddings();
|
||||
|
||||
if (request->ropescaling() == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
.PHONY: autogptq
|
||||
autogptq: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
@@ -10,4 +10,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,93 +0,0 @@
|
||||
####
|
||||
# Attention! This file is abandoned.
|
||||
# Please use the ../common-env/transformers/transformers.yml file to manage dependencies.
|
||||
###
|
||||
name: autogptq
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.27.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- auto-gptq==0.7.1
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- dill==0.3.7
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub==0.16.4
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- optimum==1.17.1
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- safetensors>=0.3.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- tqdm==4.66.1
|
||||
- torch==2.2.1
|
||||
- torchvision==0.17.1
|
||||
- transformers==4.34.0
|
||||
- transformers_stream_generator==0.0.5
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==2.0.6
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
14
backend/python/autogptq/install.sh
Executable file
14
backend/python/autogptq/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
4
backend/python/autogptq/requirements-intel.txt
Normal file
4
backend/python/autogptq/requirements-intel.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
7
backend/python/autogptq/requirements.txt
Normal file
7
backend/python/autogptq/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,14 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the autogptq server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/autogptq.py $@
|
||||
startBackend $@
|
||||
6
backend/python/autogptq/test.sh
Executable file
6
backend/python/autogptq/test.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -1,6 +1,6 @@
|
||||
.PHONY: ttsbark
|
||||
ttsbark: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@@ -22,4 +22,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
14
backend/python/bark/install.sh
Executable file
14
backend/python/bark/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
5
backend/python/bark/requirements-intel.txt
Normal file
5
backend/python/bark/requirements-intel.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
6
backend/python/bark/requirements.txt
Normal file
6
backend/python/bark/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
accelerate
|
||||
bark==0.1.5
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,14 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the ttsbark server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/ttsbark.py $@
|
||||
startBackend $@
|
||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "ttsbark.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
|
||||
11
backend/python/bark/test.sh
Normal file → Executable file
11
backend/python/bark/test.sh
Normal file → Executable file
@@ -1,11 +1,6 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the bark server with conda
|
||||
set -e
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test.py
|
||||
runUnittests
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
CONDA_ENV_PATH = "transformers.yml"
|
||||
|
||||
ifeq ($(BUILD_TYPE), cublas)
|
||||
CONDA_ENV_PATH = "transformers-nvidia.yml"
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE), hipblas)
|
||||
CONDA_ENV_PATH = "transformers-rocm.yml"
|
||||
endif
|
||||
|
||||
# Intel GPU are supposed to have dependencies installed in the main python
|
||||
# environment, so we skip conda installation for SYCL builds.
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: transformers
|
||||
transformers:
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
bash install.sh $(CONDA_ENV_PATH)
|
||||
@@ -1,44 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
REQUIREMENTS_FILE=$1
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if [ $SKIP_CONDA -eq 1 ]; then
|
||||
echo "Skipping conda environment installation"
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
if conda_env_exists "transformers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name transformers --file $REQUIREMENTS_FILE
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the intel image
|
||||
# (no conda env)
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install torch==2.1.0.post0 torchvision==0.16.0.post0 torchaudio==2.1.0.post0 intel-extension-for-pytorch==2.1.20+xpu oneccl_bind_pt==2.1.200+xpu intel-extension-for-transformers datasets sentencepiece tiktoken neural_speed optimum[openvino] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
fi
|
||||
|
||||
# If we didn't skip conda, activate the environment
|
||||
# to install FlashAttention
|
||||
if [ $SKIP_CONDA -eq 0 ]; then
|
||||
source activate transformers
|
||||
fi
|
||||
if [[ $REQUIREMENTS_FILE =~ -nvidia.yml$ ]]; then
|
||||
#TODO: FlashAttention is supported on nvidia and ROCm, but ROCm install can't be done this easily
|
||||
pip install flash-attn --no-build-isolation
|
||||
fi
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
@@ -1,125 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.27.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- auto-gptq==0.7.1
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- bitsandbytes==0.43.0
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- TTS==0.22.0
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.5.1 # Updated Version
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- optimum==1.17.1
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.12.0 # Updated Version
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers
|
||||
- torch==2.1.2
|
||||
- torchvision==0.16.2
|
||||
- torchaudio==2.1.2
|
||||
- tqdm==4.66.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
- soundfile
|
||||
- langid
|
||||
- wget
|
||||
- unidecode
|
||||
- pyopenjtalk-prebuilt
|
||||
- pypinyin
|
||||
- inflect
|
||||
- cn2an
|
||||
- jieba
|
||||
- eng_to_ipa
|
||||
- openai-whisper
|
||||
- matplotlib
|
||||
- gradio==3.41.2
|
||||
- nltk
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
@@ -1,113 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- --pre
|
||||
- --extra-index-url https://download.pytorch.org/whl/nightly/
|
||||
- accelerate==0.27.0
|
||||
- auto-gptq==0.7.1
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- TTS==0.22.0
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.5.1 # Updated Version
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx
|
||||
- numpy==1.26.0
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.12.0 # Updated Version
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers
|
||||
- torch
|
||||
- torchaudio
|
||||
- tqdm==4.66.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
- soundfile
|
||||
- langid
|
||||
- wget
|
||||
- unidecode
|
||||
- optimum==1.17.1
|
||||
- pyopenjtalk-prebuilt
|
||||
- pypinyin
|
||||
- inflect
|
||||
- cn2an
|
||||
- jieba
|
||||
- eng_to_ipa
|
||||
- openai-whisper
|
||||
- matplotlib
|
||||
- gradio==3.41.2
|
||||
- nltk
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
@@ -1,118 +0,0 @@
|
||||
name: transformers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate==0.27.0
|
||||
- aiohttp==3.8.5
|
||||
- aiosignal==1.3.1
|
||||
- auto-gptq==0.7.1
|
||||
- async-timeout==4.0.3
|
||||
- attrs==23.1.0
|
||||
- bark==0.1.5
|
||||
- boto3==1.28.61
|
||||
- botocore==1.31.61
|
||||
- certifi==2023.7.22
|
||||
- coloredlogs==15.0.1
|
||||
- TTS==0.22.0
|
||||
- charset-normalizer==3.3.0
|
||||
- datasets==2.14.5
|
||||
- sentence-transformers==2.5.1 # Updated Version
|
||||
- sentencepiece==0.1.99
|
||||
- dill==0.3.7
|
||||
- einops==0.7.0
|
||||
- encodec==0.1.1
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub
|
||||
- humanfriendly==10.0
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- multidict==6.0.4
|
||||
- multiprocess==0.70.15
|
||||
- networkx
|
||||
- numpy==1.26.0
|
||||
- onnx==1.15.0
|
||||
- openvino==2024.1.0
|
||||
- openvino-telemetry==2024.1.0
|
||||
- optimum[openvino]==1.19.1
|
||||
- optimum-intel==1.16.1
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- python-dateutil==2.8.2
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rouge==1.0.1
|
||||
- s3transfer==0.7.0
|
||||
- safetensors>=0.4.1
|
||||
- scipy==1.12.0 # Updated Version
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers
|
||||
- torch==2.1.2
|
||||
- torchvision==0.16.2
|
||||
- torchaudio==2.1.2
|
||||
- tqdm==4.66.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
- soundfile
|
||||
- langid
|
||||
- wget
|
||||
- unidecode
|
||||
- pyopenjtalk-prebuilt
|
||||
- pypinyin
|
||||
- inflect
|
||||
- cn2an
|
||||
- jieba
|
||||
- eng_to_ipa
|
||||
- openai-whisper
|
||||
- matplotlib
|
||||
- gradio==3.41.2
|
||||
- nltk
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm>=0.4.0
|
||||
- transformers>=4.38.2 # Updated Version
|
||||
- transformers_stream_generator==0.0.5
|
||||
- xformers==0.0.23.post1
|
||||
- rerankers[transformers]
|
||||
- pydantic
|
||||
prefix: /opt/conda/envs/transformers
|
||||
213
backend/python/common/libbackend.sh
Normal file
213
backend/python/common/libbackend.sh
Normal file
@@ -0,0 +1,213 @@
|
||||
|
||||
|
||||
# init handles the setup of the library
|
||||
#
|
||||
# use the library by adding the following line to a script:
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
# If you want to limit what targets a backend can be used on, set the variable LIMIT_TARGETS to a
|
||||
# space separated list of valid targets BEFORE sourcing the library, for example to only allow a backend
|
||||
# to be used on CUDA and CPU backends:
|
||||
#
|
||||
# LIMIT_TARGETS="cublas cpu"
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
# You can use any valid BUILD_TYPE or BUILD_PROFILE, if you need to limit a backend to CUDA 12 only:
|
||||
#
|
||||
# LIMIT_TARGETS="cublas12"
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
#
|
||||
function init() {
|
||||
BACKEND_NAME=${PWD##*/}
|
||||
MY_DIR=$(realpath `dirname $0`)
|
||||
BUILD_PROFILE=$(getBuildProfile)
|
||||
|
||||
# If a backend has defined a list of valid build profiles...
|
||||
if [ ! -z "${LIMIT_TARGETS}" ]; then
|
||||
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
|
||||
if [ ${isValidTarget} != true ]; then
|
||||
echo "${BACKEND_NAME} can only be used on the following targets: ${LIMIT_TARGETS}"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Initializing libbackend for ${BACKEND_NAME}"
|
||||
}
|
||||
|
||||
# getBuildProfile will inspect the system to determine which build profile is appropriate:
|
||||
# returns one of the following:
|
||||
# - cublas11
|
||||
# - cublas12
|
||||
# - hipblas
|
||||
# - intel
|
||||
function getBuildProfile() {
|
||||
# First check if we are a cublas build, and if so report the correct build profile
|
||||
if [ x"${BUILD_TYPE}" == "xcublas" ]; then
|
||||
if [ ! -z ${CUDA_MAJOR_VERSION} ]; then
|
||||
# If we have been given a CUDA version, we trust it
|
||||
echo ${BUILD_TYPE}${CUDA_MAJOR_VERSION}
|
||||
else
|
||||
# We don't know what version of cuda we are, so we report ourselves as a generic cublas
|
||||
echo ${BUILD_TYPE}
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If /opt/intel exists, then we are doing an intel/ARC build
|
||||
if [ -d "/opt/intel" ]; then
|
||||
echo "intel"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If for any other values of BUILD_TYPE, we don't need any special handling/discovery
|
||||
if [ ! -z ${BUILD_TYPE} ]; then
|
||||
echo ${BUILD_TYPE}
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If there is no BUILD_TYPE set at all, set a build-profile value of CPU, we aren't building for any GPU targets
|
||||
echo "cpu"
|
||||
}
|
||||
|
||||
# ensureVenv makes sure that the venv for the backend both exists, and is activated.
|
||||
#
|
||||
# This function is idempotent, so you can call it as many times as you want and it will
|
||||
# always result in an activated virtual environment
|
||||
function ensureVenv() {
|
||||
if [ ! -d "${MY_DIR}/venv" ]; then
|
||||
uv venv ${MY_DIR}/venv
|
||||
echo "virtualenv created"
|
||||
fi
|
||||
|
||||
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
|
||||
source ${MY_DIR}/venv/bin/activate
|
||||
echo "virtualenv activated"
|
||||
fi
|
||||
|
||||
echo "activated virtualenv has been ensured"
|
||||
}
|
||||
|
||||
# installRequirements looks for several requirements files and if they exist runs the install for them in order
|
||||
#
|
||||
# - requirements-install.txt
|
||||
# - requirements.txt
|
||||
# - requirements-${BUILD_TYPE}.txt
|
||||
# - requirements-${BUILD_PROFILE}.txt
|
||||
#
|
||||
# BUILD_PROFILE is a pore specific version of BUILD_TYPE, ex: cuda11 or cuda12
|
||||
# it can also include some options that we do not have BUILD_TYPES for, ex: intel
|
||||
#
|
||||
# NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.
|
||||
# you may want to add the following line to a requirements-intel.txt if you use one:
|
||||
#
|
||||
# --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
#
|
||||
# If you need to add extra flags into the pip install command you can do so by setting the variable EXTRA_PIP_INSTALL_FLAGS
|
||||
# before calling installRequirements. For example:
|
||||
#
|
||||
# source $(dirname $0)/../common/libbackend.sh
|
||||
# EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
||||
# installRequirements
|
||||
function installRequirements() {
|
||||
ensureVenv
|
||||
|
||||
# These are the requirements files we will attempt to install, in order
|
||||
declare -a requirementFiles=(
|
||||
"${MY_DIR}/requirements-install.txt"
|
||||
"${MY_DIR}/requirements.txt"
|
||||
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
|
||||
)
|
||||
|
||||
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
|
||||
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
|
||||
fi
|
||||
|
||||
for reqFile in ${requirementFiles[@]}; do
|
||||
if [ -f ${reqFile} ]; then
|
||||
echo "starting requirements install for ${reqFile}"
|
||||
uv pip install ${EXTRA_PIP_INSTALL_FLAGS} --requirement ${reqFile}
|
||||
echo "finished requirements install for ${reqFile}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# startBackend discovers and runs the backend GRPC server
|
||||
#
|
||||
# You can specify a specific backend file to execute by setting BACKEND_FILE before calling startBackend.
|
||||
# example:
|
||||
#
|
||||
# source ../common/libbackend.sh
|
||||
# BACKEND_FILE="${MY_DIR}/source/backend.py"
|
||||
# startBackend $@
|
||||
#
|
||||
# valid filenames for autodiscovered backend servers are:
|
||||
# - server.py
|
||||
# - backend.py
|
||||
# - ${BACKEND_NAME}.py
|
||||
function startBackend() {
|
||||
ensureVenv
|
||||
|
||||
if [ ! -z ${BACKEND_FILE} ]; then
|
||||
python ${BACKEND_FILE} $@
|
||||
elif [ -e "${MY_DIR}/server.py" ]; then
|
||||
python ${MY_DIR}/server.py $@
|
||||
elif [ -e "${MY_DIR}/backend.py" ]; then
|
||||
python ${MY_DIR}/backend.py $@
|
||||
elif [ -e "${MY_DIR}/${BACKEND_NAME}.py" ]; then
|
||||
python ${MY_DIR}/${BACKEND_NAME}.py $@
|
||||
fi
|
||||
}
|
||||
|
||||
# runUnittests discovers and runs python unittests
|
||||
#
|
||||
# You can specify a specific test file to use by setting TEST_FILE before calling runUnittests.
|
||||
# example:
|
||||
#
|
||||
# source ../common/libbackend.sh
|
||||
# TEST_FILE="${MY_DIR}/source/test.py"
|
||||
# runUnittests $@
|
||||
#
|
||||
# be default a file named test.py in the backends directory will be used
|
||||
function runUnittests() {
|
||||
ensureVenv
|
||||
|
||||
if [ ! -z ${TEST_FILE} ]; then
|
||||
testDir=$(dirname `realpath ${TEST_FILE}`)
|
||||
testFile=$(basename ${TEST_FILE})
|
||||
pushd ${testDir}
|
||||
python -m unittest ${testFile}
|
||||
popd
|
||||
elif [ -f "${MY_DIR}/test.py" ]; then
|
||||
pushd ${MY_DIR}
|
||||
python -m unittest test.py
|
||||
popd
|
||||
else
|
||||
echo "no tests defined for ${BACKEND_NAME}"
|
||||
fi
|
||||
}
|
||||
|
||||
##################################################################################
|
||||
# Below here are helper functions not intended to be used outside of the library #
|
||||
##################################################################################
|
||||
|
||||
# checkTargets determines if the current BUILD_TYPE or BUILD_PROFILE is in a list of valid targets
|
||||
function checkTargets() {
|
||||
# Collect all provided targets into a variable and...
|
||||
targets=$@
|
||||
# ...convert it into an array
|
||||
declare -a targets=($targets)
|
||||
|
||||
for target in ${targets[@]}; do
|
||||
if [ "x${BUILD_TYPE}" == "x${target}" ]; then
|
||||
echo true
|
||||
return 0
|
||||
fi
|
||||
if [ "x${BUILD_PROFILE}" == "x${target}" ]; then
|
||||
echo true
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
echo false
|
||||
}
|
||||
|
||||
init
|
||||
19
backend/python/common/template/Makefile
Normal file
19
backend/python/common/template/Makefile
Normal file
@@ -0,0 +1,19 @@
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: install
|
||||
install: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
4
backend/python/common/template/backend.py
Executable file
4
backend/python/common/template/backend.py
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
import grpc
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
14
backend/python/common/template/install.sh
Executable file
14
backend/python/common/template/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
4
backend/python/common/template/requirements-intel.txt
Normal file
4
backend/python/common/template/requirements-intel.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
2
backend/python/common/template/requirements.txt
Normal file
2
backend/python/common/template/requirements.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
4
backend/python/common/template/run.sh
Executable file
4
backend/python/common/template/run.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
6
backend/python/common/template/test.sh
Executable file
6
backend/python/common/template/test.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -1,6 +1,6 @@
|
||||
.PHONY: coqui
|
||||
coqui: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@@ -22,4 +22,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
14
backend/python/coqui/install.sh
Executable file
14
backend/python/coqui/install.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
5
backend/python/coqui/requirements-intel.txt
Normal file
5
backend/python/coqui/requirements-intel.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
6
backend/python/coqui/requirements.txt
Normal file
6
backend/python/coqui/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
accelerate
|
||||
TTS==0.22.0
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,14 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the ttsbark server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/coqui_server.py $@
|
||||
startBackend $@
|
||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "coqui_server.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
|
||||
11
backend/python/coqui/test.sh
Normal file → Executable file
11
backend/python/coqui/test.sh
Normal file → Executable file
@@ -1,11 +1,6 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the bark server with conda
|
||||
set -e
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test.py
|
||||
runUnittests
|
||||
|
||||
@@ -13,8 +13,7 @@ endif
|
||||
|
||||
.PHONY: diffusers
|
||||
diffusers: protogen
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
bash install.sh $(CONDA_ENV_PATH)
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@@ -33,4 +32,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
@@ -1,65 +0,0 @@
|
||||
name: diffusers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- --pre
|
||||
- --extra-index-url https://download.pytorch.org/whl/nightly/
|
||||
- accelerate>=0.11.0
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- compel==2.0.2
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- omegaconf
|
||||
- packaging==23.2
|
||||
- pillow==10.0.1
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyparsing==3.1.1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- safetensors==0.4.0
|
||||
- sympy==1.12
|
||||
- tqdm==4.66.1
|
||||
- transformers>=4.25.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- torch
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
@@ -1,75 +0,0 @@
|
||||
name: diffusers
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- compel==2.0.2
|
||||
- diffusers==0.24.0
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.63.0
|
||||
- huggingface-hub>=0.19.4
|
||||
- idna==3.4
|
||||
- importlib-metadata==6.8.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- omegaconf
|
||||
- packaging==23.2
|
||||
- pillow==10.0.1
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyparsing==3.1.1
|
||||
- pyyaml==6.0.1
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- safetensors==0.4.0
|
||||
- sympy==1.12
|
||||
- torch==2.1.0
|
||||
- tqdm==4.66.1
|
||||
- transformers>=4.25.1
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- urllib3==2.0.6
|
||||
- zipp==3.17.0
|
||||
- opencv-python
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
@@ -1,50 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
set -e
|
||||
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if [ $SKIP_CONDA -eq 1 ]; then
|
||||
echo "Skipping conda environment installation"
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
if conda_env_exists "diffusers" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name diffusers --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Intel GPU: If the directory exists, we assume we are using the Intel image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
pip install torch==2.1.0a0 \
|
||||
torchvision==0.16.0a0 \
|
||||
torchaudio==2.1.0a0 \
|
||||
intel-extension-for-pytorch==2.1.10+xpu \
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
|
||||
pip install google-api-python-client \
|
||||
grpcio==1.63.0 \
|
||||
grpcio-tools==1.63.0 \
|
||||
diffusers==0.24.0 \
|
||||
transformers>=4.25.1 \
|
||||
accelerate \
|
||||
compel==2.0.2 \
|
||||
Pillow
|
||||
fi
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
if [ $SKIP_CONDA -ne 1 ]; then
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
fi
|
||||
|
||||
pip cache purge
|
||||
fi
|
||||
installRequirements
|
||||
|
||||
5
backend/python/diffusers/requirements-intel.txt
Normal file
5
backend/python/diffusers/requirements-intel.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchvision
|
||||
optimum[openvino]
|
||||
10
backend/python/diffusers/requirements.txt
Normal file
10
backend/python/diffusers/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
accelerate
|
||||
compel
|
||||
diffusers
|
||||
grpcio==1.63.0
|
||||
opencv-python
|
||||
pillow
|
||||
protobuf
|
||||
torch
|
||||
transformers
|
||||
certifi
|
||||
@@ -1,19 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/backend_diffusers.py $@
|
||||
startBackend $@
|
||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend_diffusers.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
|
||||
14
backend/python/diffusers/test.sh
Normal file → Executable file
14
backend/python/diffusers/test.sh
Normal file → Executable file
@@ -1,14 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate diffusers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test.py
|
||||
runUnittests
|
||||
|
||||
1
backend/python/exllama/.gitignore
vendored
Normal file
1
backend/python/exllama/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
source
|
||||
@@ -18,4 +18,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv source __pycache__
|
||||
@@ -14,9 +14,9 @@ import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import version as torch_version
|
||||
|
||||
from tokenizer import ExLlamaTokenizer
|
||||
from generator import ExLlamaGenerator
|
||||
from model import ExLlama, ExLlamaCache, ExLlamaConfig
|
||||
from source.tokenizer import ExLlamaTokenizer
|
||||
from source.generator import ExLlamaGenerator
|
||||
from source.model import ExLlama, ExLlamaCache, ExLlamaConfig
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
name: exllama
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.63.0
|
||||
- jinja2==3.1.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- ninja==1.11.1
|
||||
- protobuf==4.24.4
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- safetensors==0.3.2
|
||||
- sentencepiece==0.1.99
|
||||
- sympy==1.12
|
||||
- torch==2.1.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- numpy
|
||||
prefix: /opt/conda/envs/exllama
|
||||
@@ -1,32 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
set -e
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
LIMIT_TARGETS="cublas"
|
||||
|
||||
if [ "$BUILD_TYPE" != "cublas" ]; then
|
||||
echo "[exllama] Attention!!! Nvidia GPU is required - skipping installation"
|
||||
exit 0
|
||||
fi
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
installRequirements
|
||||
|
||||
if conda_env_exists "exllama" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name exllama --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
git clone https://github.com/turboderp/exllama $MY_DIR/source
|
||||
uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
|
||||
|
||||
source activate exllama
|
||||
|
||||
git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd
|
||||
|
||||
cp -rfv $CONDA_PREFIX/exllama/* ./
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
cp -v ./*py $MY_DIR/source/
|
||||
|
||||
6
backend/python/exllama/requirements.txt
Normal file
6
backend/python/exllama/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
torch
|
||||
transformers
|
||||
certifi
|
||||
setuptools
|
||||
@@ -1,15 +1,7 @@
|
||||
#!/bin/bash
|
||||
LIMIT_TARGETS="cublas"
|
||||
BACKEND_FILE="${MY_DIR}/source/backend.py"
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
cd $DIR
|
||||
|
||||
python $DIR/exllama.py $@
|
||||
startBackend $@
|
||||
6
backend/python/exllama/test.sh
Executable file
6
backend/python/exllama/test.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
1
backend/python/exllama2/.gitignore
vendored
Normal file
1
backend/python/exllama2/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
source
|
||||
@@ -1,6 +1,5 @@
|
||||
.PHONY: exllama2
|
||||
exllama2: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
@@ -17,4 +16,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv source __pycache__
|
||||
@@ -1,57 +0,0 @@
|
||||
name: exllama2
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- filelock==3.12.4
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.63.0
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- networkx==3.1
|
||||
- protobuf==4.24.4
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- pandas
|
||||
- numpy
|
||||
- ninja
|
||||
- fastparquet
|
||||
- torch>=2.1.0
|
||||
- safetensors>=0.3.2
|
||||
- sentencepiece>=0.1.97
|
||||
- pygments
|
||||
- websockets
|
||||
- regex
|
||||
prefix: /opt/conda/envs/exllama2
|
||||
@@ -1,32 +1,16 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
export SHA=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
|
||||
|
||||
if [ "$BUILD_TYPE" != "cublas" ]; then
|
||||
echo "[exllamav2] Attention!!! Nvidia GPU is required - skipping installation"
|
||||
exit 0
|
||||
fi
|
||||
LIMIT_TARGETS="cublas"
|
||||
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
||||
EXLLAMA2_VERSION=c0ddebaaaf8ffd1b3529c2bb654e650bce2f790f
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
source activate transformers
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
installRequirements
|
||||
|
||||
git clone https://github.com/turboderp/exllamav2 $CONDA_PREFIX/exllamav2
|
||||
git clone https://github.com/turboderp/exllamav2 $MY_DIR/source
|
||||
pushd ${MY_DIR}/source && git checkout -b build ${EXLLAMA2_VERSION} && popd
|
||||
|
||||
pushd $CONDA_PREFIX/exllamav2
|
||||
|
||||
git checkout -b build $SHA
|
||||
|
||||
# TODO: this needs to be pinned within the conda environments
|
||||
pip install -r requirements.txt
|
||||
|
||||
popd
|
||||
|
||||
cp -rfv $CONDA_PREFIX/exllamav2/* ./
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
# This installs exllamav2 in JIT mode so it will compile the appropriate torch extension at runtime
|
||||
EXLLAMA_NOCOMPILE= uv pip install ${EXTRA_PIP_INSTALL_FLAGS} ${MY_DIR}/source/
|
||||
|
||||
4
backend/python/exllama2/requirements-install.txt
Normal file
4
backend/python/exllama2/requirements-install.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
# This is here to trigger the install script to add --no-build-isolation to the uv pip install commands
|
||||
# exllama2 does not specify it's build requirements per PEP517, so we need to provide some things ourselves
|
||||
wheel
|
||||
setuptools
|
||||
7
backend/python/exllama2/requirements.txt
Normal file
7
backend/python/exllama2/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
accelerate
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
certifi
|
||||
torch
|
||||
wheel
|
||||
setuptools
|
||||
@@ -1,16 +1,6 @@
|
||||
#!/bin/bash
|
||||
LIMIT_TARGETS="cublas"
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
cd $DIR
|
||||
|
||||
python $DIR/exllama2_backend.py $@
|
||||
startBackend $@
|
||||
6
backend/python/exllama2/test.sh
Executable file
6
backend/python/exllama2/test.sh
Executable file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
||||
@@ -1,7 +1,6 @@
|
||||
.PHONY: mamba
|
||||
mamba: protogen
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@@ -23,4 +22,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv __pycache__
|
||||
@@ -1,22 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
##
|
||||
## A bash script installs the required dependencies of VALL-E-X and prepares the environment
|
||||
|
||||
if [ "$BUILD_TYPE" != "cublas" ]; then
|
||||
echo "[mamba] Attention!!! nvcc is required - skipping installation"
|
||||
exit 0
|
||||
fi
|
||||
LIMIT_TARGETS="cublas"
|
||||
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
|
||||
pip install causal-conv1d==1.0.0 mamba-ssm==1.0.1
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
installRequirements
|
||||
7
backend/python/mamba/requirements-install.txt
Normal file
7
backend/python/mamba/requirements-install.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
|
||||
# this also means that we need to install the basic build dependencies into the venv ourselves
|
||||
# https://github.com/Dao-AILab/causal-conv1d/issues/24
|
||||
packaging
|
||||
setuptools
|
||||
wheel
|
||||
torch==2.2.0
|
||||
6
backend/python/mamba/requirements.txt
Normal file
6
backend/python/mamba/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
causal-conv1d==1.2.0.post2
|
||||
mamba-ssm==1.2.0.post1
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,14 +1,6 @@
|
||||
#!/bin/bash
|
||||
LIMIT_TARGETS="cublas"
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the diffusers server with conda
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/backend_mamba.py $@
|
||||
startBackend $@
|
||||
@@ -20,7 +20,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
||||
"""
|
||||
def setUp(self):
|
||||
self.service = subprocess.Popen(["python", "backend_vllm.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
11
backend/python/mamba/test.sh
Normal file → Executable file
11
backend/python/mamba/test.sh
Normal file → Executable file
@@ -1,11 +1,6 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the transformers server with conda
|
||||
set -e
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test_backend_mamba.py
|
||||
runUnittests
|
||||
|
||||
@@ -36,4 +36,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv __pycache__
|
||||
@@ -1,39 +1,19 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
set -e
|
||||
|
||||
SKIP_CONDA=${SKIP_CONDA:-0}
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Check if environment exist
|
||||
conda_env_exists(){
|
||||
! conda list --name "${@}" >/dev/null 2>/dev/null
|
||||
}
|
||||
|
||||
if [ $SKIP_CONDA -eq 1 ]; then
|
||||
echo "Skipping conda environment installation"
|
||||
else
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
if conda_env_exists "parler" ; then
|
||||
echo "Creating virtual environment..."
|
||||
conda env create --name parler --file $1
|
||||
echo "Virtual environment created."
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
if [ $SKIP_CONDA -ne 1 ]; then
|
||||
# Activate conda environment
|
||||
source activate parler
|
||||
# https://github.com/descriptinc/audiotools/issues/101
|
||||
# incompatible protobuf versions.
|
||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o $CONDA_PREFIX/lib/python3.11/site-packages/google/protobuf/internal/builder.py
|
||||
fi
|
||||
installRequirements
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
if [ $SKIP_CONDA -ne 1 ]; then
|
||||
# Activate conda environment
|
||||
source activate parler
|
||||
fi
|
||||
|
||||
pip cache purge
|
||||
fi
|
||||
# https://github.com/descriptinc/audiotools/issues/101
|
||||
# incompatible protobuf versions.
|
||||
PYDIR=$(ls ${MY_DIR}/venv/lib)
|
||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
name: parler
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- grpcio==1.63.0
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu12==12.1.3.1
|
||||
- nvidia-cuda-cupti-cu12==12.1.105
|
||||
- nvidia-cuda-nvrtc-cu12==12.1.105
|
||||
- nvidia-cuda-runtime-cu12==12.1.105
|
||||
- nvidia-cudnn-cu12==8.9.2.26
|
||||
- nvidia-cufft-cu12==11.0.2.54
|
||||
- nvidia-curand-cu12==10.3.2.106
|
||||
- nvidia-cusolver-cu12==11.4.5.107
|
||||
- nvidia-cusparse-cu12==12.1.0.106
|
||||
- nvidia-nccl-cu12==2.18.1
|
||||
- nvidia-nvjitlink-cu12==12.2.140
|
||||
- nvidia-nvtx-cu12==12.1.105
|
||||
- torch==2.1.0
|
||||
- transformers>=4.34.0
|
||||
- descript-audio-codec
|
||||
- sentencepiece
|
||||
- git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||
prefix: /opt/conda/envs/diffusers
|
||||
@@ -1,36 +0,0 @@
|
||||
name: parler
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- tzdata=2023c=h04d1e81_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- accelerate>=0.11.0
|
||||
- numpy==1.26.0
|
||||
- grpcio==1.63.0
|
||||
- torch==2.1.0
|
||||
- transformers>=4.34.0
|
||||
- descript-audio-codec
|
||||
- sentencepiece
|
||||
- git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||
prefix: /opt/conda/envs/parler
|
||||
5
backend/python/parler-tts/requirements-intel.txt
Normal file
5
backend/python/parler-tts/requirements-intel.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
7
backend/python/parler-tts/requirements.txt
Normal file
7
backend/python/parler-tts/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
accelerate
|
||||
grpcio==1.63.0
|
||||
protobuf
|
||||
torch
|
||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||
certifi
|
||||
transformers
|
||||
16
backend/python/parler-tts/run.sh
Normal file → Executable file
16
backend/python/parler-tts/run.sh
Normal file → Executable file
@@ -1,16 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the parler-tts server with conda
|
||||
|
||||
echo "Launching gRPC server for parler-tts"
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate parler
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/parler_tts_server.py $@
|
||||
startBackend $@
|
||||
@@ -18,7 +18,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "parler_tts_server.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
11
backend/python/parler-tts/test.sh
Normal file → Executable file
11
backend/python/parler-tts/test.sh
Normal file → Executable file
@@ -1,11 +1,6 @@
|
||||
#!/bin/bash
|
||||
##
|
||||
## A bash script wrapper that runs the transformers server with conda
|
||||
set -e
|
||||
|
||||
# Activate conda environment
|
||||
source activate parler
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python -m unittest $DIR/test_parler.py
|
||||
runUnittests
|
||||
|
||||
@@ -24,4 +24,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
13
backend/python/petals/install.sh
Normal file → Executable file
13
backend/python/petals/install.sh
Normal file → Executable file
@@ -1,5 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
conda env create --name petals --file $1
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
name: petals
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
# - _libgcc_mutex=0.1=main
|
||||
# - _openmp_mutex=5.1=1_gnu
|
||||
# - bzip2=1.0.8=h7b6447c_0
|
||||
# - ca-certificates=2023.08.22=h06a4308_0
|
||||
# - ld_impl_linux-64=2.38=h1181459_1
|
||||
# - libffi=3.4.4=h6a678d5_0
|
||||
# - libgcc-ng=11.2.0=h1234567_1
|
||||
# - libgomp=11.2.0=h1234567_1
|
||||
# - libstdcxx-ng=11.2.0=h1234567_1
|
||||
# - libuuid=1.41.5=h5eee18b_0
|
||||
# - ncurses=6.4=h6a678d5_0
|
||||
# - openssl=3.0.11=h7f8727e_2
|
||||
# - pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
# - readline=8.2=h5eee18b_0
|
||||
# - setuptools=68.0.0=py311h06a4308_0
|
||||
# - sqlite=3.41.2=h5eee18b_0
|
||||
# - tk=8.6.12=h1ccaba5_0
|
||||
# - tzdata=2023c=h04d1e81_0
|
||||
# - wheel=0.41.2=py311h06a4308_0
|
||||
# - xz=5.4.2=h5eee18b_0
|
||||
# - zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- torch==2.1.0
|
||||
- git+https://github.com/bigscience-workshop/petals
|
||||
prefix: /opt/conda/envs/petals
|
||||
4
backend/python/petals/requirements-intel.txt
Normal file
4
backend/python/petals/requirements-intel.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
3
backend/python/petals/requirements.txt
Normal file
3
backend/python/petals/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
git+https://github.com/bigscience-workshop/petals
|
||||
certifi
|
||||
transformers
|
||||
@@ -1,23 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
##
|
||||
## A bash script wrapper that runs the exllama server with conda
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
CONDA_ENV=petals
|
||||
|
||||
# Activate conda environment
|
||||
# if source is available use it, or use conda
|
||||
#
|
||||
if [ -f /opt/conda/bin/activate ]; then
|
||||
source activate $CONDA_ENV
|
||||
else
|
||||
eval "$(conda shell.bash hook)"
|
||||
conda activate $CONDA_ENV
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
python $DIR/backend_petals.py $@
|
||||
startBackend $@
|
||||
@@ -20,7 +20,7 @@ class TestBackendServicer(unittest.TestCase):
|
||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
||||
"""
|
||||
def setUp(self):
|
||||
self.service = subprocess.Popen(["python", "backend_petals.py", "--addr", "localhost:50051"])
|
||||
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user