mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 08:38:02 -04:00
Compare commits
1 Commits
feat/realt
...
fixes/ci
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc11323d1c |
@@ -1,11 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: model delete
|
|
||||||
type: http
|
|
||||||
seq: 7
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
|
|
||||||
body: none
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
Binary file not shown.
@@ -1,16 +0,0 @@
|
|||||||
meta {
|
|
||||||
name: transcribe
|
|
||||||
type: http
|
|
||||||
seq: 1
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
|
||||||
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
|
|
||||||
body: multipartForm
|
|
||||||
auth: none
|
|
||||||
}
|
|
||||||
|
|
||||||
body:multipart-form {
|
|
||||||
file: @file(transcription/gb1.ogg)
|
|
||||||
model: whisper-1
|
|
||||||
}
|
|
||||||
9
.env
9
.env
@@ -82,15 +82,6 @@
|
|||||||
# Enable to allow p2p mode
|
# Enable to allow p2p mode
|
||||||
# LOCALAI_P2P=true
|
# LOCALAI_P2P=true
|
||||||
|
|
||||||
# Enable to use federated mode
|
|
||||||
# LOCALAI_FEDERATED=true
|
|
||||||
|
|
||||||
# Enable to start federation server
|
|
||||||
# FEDERATED_SERVER=true
|
|
||||||
|
|
||||||
# Define to use federation token
|
|
||||||
# TOKEN=""
|
|
||||||
|
|
||||||
### Watchdog settings
|
### Watchdog settings
|
||||||
###
|
###
|
||||||
# Enables watchdog to kill backends that are inactive for too much time
|
# Enables watchdog to kill backends that are inactive for too much time
|
||||||
|
|||||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1,2 +1 @@
|
|||||||
*.sh text eol=lf
|
*.sh text eol=lf
|
||||||
backend/cpp/llama/*.hpp linguist-vendored
|
|
||||||
9
.github/labeler.yml
vendored
9
.github/labeler.yml
vendored
@@ -1,15 +1,6 @@
|
|||||||
enhancements:
|
enhancements:
|
||||||
- head-branch: ['^feature', 'feature']
|
- head-branch: ['^feature', 'feature']
|
||||||
|
|
||||||
dependencies:
|
|
||||||
- any:
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: 'Makefile'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.mod'
|
|
||||||
- changed-files:
|
|
||||||
- any-glob-to-any-file: '*.sum'
|
|
||||||
|
|
||||||
kind/documentation:
|
kind/documentation:
|
||||||
- any:
|
- any:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
|
|||||||
17
.github/workflows/bump_deps.yaml
vendored
17
.github/workflows/bump_deps.yaml
vendored
@@ -12,14 +12,23 @@ jobs:
|
|||||||
- repository: "ggerganov/llama.cpp"
|
- repository: "ggerganov/llama.cpp"
|
||||||
variable: "CPPLLAMA_VERSION"
|
variable: "CPPLLAMA_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
- repository: "go-skynet/go-ggml-transformers.cpp"
|
||||||
|
variable: "GOGGMLTRANSFORMERS_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
- repository: "donomii/go-rwkv.cpp"
|
||||||
|
variable: "RWKV_VERSION"
|
||||||
|
branch: "main"
|
||||||
- repository: "ggerganov/whisper.cpp"
|
- repository: "ggerganov/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "PABannier/bark.cpp"
|
- repository: "go-skynet/go-bert.cpp"
|
||||||
variable: "BARKCPP_VERSION"
|
variable: "BERT_VERSION"
|
||||||
|
branch: "master"
|
||||||
|
- repository: "go-skynet/bloomz.cpp"
|
||||||
|
variable: "BLOOMZ_VERSION"
|
||||||
branch: "main"
|
branch: "main"
|
||||||
- repository: "leejet/stable-diffusion.cpp"
|
- repository: "mudler/go-ggllm.cpp"
|
||||||
variable: "STABLEDIFFUSION_GGML_VERSION"
|
variable: "GOGGLLM_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
- repository: "mudler/go-stable-diffusion"
|
- repository: "mudler/go-stable-diffusion"
|
||||||
variable: "STABLEDIFFUSION_VERSION"
|
variable: "STABLEDIFFUSION_VERSION"
|
||||||
|
|||||||
2
.github/workflows/checksum_checker.yaml
vendored
2
.github/workflows/checksum_checker.yaml
vendored
@@ -23,7 +23,7 @@ jobs:
|
|||||||
sudo pip install --upgrade pip
|
sudo pip install --upgrade pip
|
||||||
pip install huggingface_hub
|
pip install huggingface_hub
|
||||||
- name: 'Setup yq'
|
- name: 'Setup yq'
|
||||||
uses: dcarbone/install-yq-action@v1.3.1
|
uses: dcarbone/install-yq-action@v1.1.1
|
||||||
with:
|
with:
|
||||||
version: 'v4.44.2'
|
version: 'v4.44.2'
|
||||||
download-compressed: true
|
download-compressed: true
|
||||||
|
|||||||
4
.github/workflows/deploy-explorer.yaml
vendored
4
.github/workflows/deploy-explorer.yaml
vendored
@@ -33,7 +33,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
CGO_ENABLED=0 make build-api
|
CGO_ENABLED=0 make build-api
|
||||||
- name: rm
|
- name: rm
|
||||||
uses: appleboy/ssh-action@v1.2.0
|
uses: appleboy/ssh-action@v1.1.0
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
@@ -53,7 +53,7 @@ jobs:
|
|||||||
rm: true
|
rm: true
|
||||||
target: ./local-ai
|
target: ./local-ai
|
||||||
- name: restarting
|
- name: restarting
|
||||||
uses: appleboy/ssh-action@v1.2.0
|
uses: appleboy/ssh-action@v1.1.0
|
||||||
with:
|
with:
|
||||||
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
host: ${{ secrets.EXPLORER_SSH_HOST }}
|
||||||
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
|
||||||
|
|||||||
2
.github/workflows/generate_intel_image.yaml
vendored
2
.github/workflows/generate_intel_image.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
|
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
|
|||||||
47
.github/workflows/image.yml
vendored
47
.github/workflows/image.yml
vendored
@@ -280,7 +280,6 @@ jobs:
|
|||||||
makeflags: ${{ matrix.makeflags }}
|
makeflags: ${{ matrix.makeflags }}
|
||||||
latest-image: ${{ matrix.latest-image }}
|
latest-image: ${{ matrix.latest-image }}
|
||||||
latest-image-aio: ${{ matrix.latest-image-aio }}
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
skip-drivers: ${{ matrix.skip-drivers }}
|
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@@ -302,7 +301,6 @@ jobs:
|
|||||||
latest-image: 'latest-cpu'
|
latest-image: 'latest-cpu'
|
||||||
latest-image-aio: 'latest-aio-cpu'
|
latest-image-aio: 'latest-aio-cpu'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -314,7 +312,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
@@ -326,7 +323,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@@ -338,7 +334,6 @@ jobs:
|
|||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
skip-drivers: 'false'
|
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "0"
|
cuda-minor-version: "0"
|
||||||
@@ -349,7 +344,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'vulkan'
|
- build-type: 'vulkan'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
@@ -360,45 +354,4 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
skip-drivers: 'false'
|
|
||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
# parallel-builds:
|
|
||||||
# uses: ./.github/workflows/image_build.yml
|
|
||||||
# with:
|
|
||||||
# tag-latest: ${{ matrix.tag-latest }}
|
|
||||||
# tag-suffix: ${{ matrix.tag-suffix }}
|
|
||||||
# ffmpeg: ${{ matrix.ffmpeg }}
|
|
||||||
# image-type: ${{ matrix.image-type }}
|
|
||||||
# build-type: ${{ matrix.build-type }}
|
|
||||||
# cuda-major-version: ${{ matrix.cuda-major-version }}
|
|
||||||
# cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
|
||||||
# platforms: ${{ matrix.platforms }}
|
|
||||||
# runs-on: ${{ matrix.runs-on }}
|
|
||||||
# aio: ${{ matrix.aio }}
|
|
||||||
# base-image: ${{ matrix.base-image }}
|
|
||||||
# grpc-base-image: ${{ matrix.grpc-base-image }}
|
|
||||||
# makeflags: ${{ matrix.makeflags }}
|
|
||||||
# latest-image: ${{ matrix.latest-image }}
|
|
||||||
# latest-image-aio: ${{ matrix.latest-image-aio }}
|
|
||||||
# skip-drivers: ${{ matrix.skip-drivers }}
|
|
||||||
# secrets:
|
|
||||||
# dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
# dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
|
||||||
# quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
|
||||||
# quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
|
||||||
# strategy:
|
|
||||||
# matrix:
|
|
||||||
# include:
|
|
||||||
# - build-type: 'cublas'
|
|
||||||
# cuda-major-version: "12"
|
|
||||||
# cuda-minor-version: "0"
|
|
||||||
# platforms: 'linux/arm64'
|
|
||||||
# tag-latest: 'false'
|
|
||||||
# tag-suffix: '-nvidia-l4t-arm64-core'
|
|
||||||
# latest-image: 'latest-nvidia-l4t-arm64-core'
|
|
||||||
# ffmpeg: 'true'
|
|
||||||
# image-type: 'core'
|
|
||||||
# base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
|
||||||
# runs-on: 'self-hosted'
|
|
||||||
# makeflags: "--jobs=4 --output-sync=target"
|
|
||||||
# skip-drivers: 'true'
|
|
||||||
|
|||||||
6
.github/workflows/image_build.yml
vendored
6
.github/workflows/image_build.yml
vendored
@@ -49,10 +49,6 @@ on:
|
|||||||
description: 'FFMPEG'
|
description: 'FFMPEG'
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
skip-drivers:
|
|
||||||
description: 'Skip drivers by default'
|
|
||||||
default: 'false'
|
|
||||||
type: string
|
|
||||||
image-type:
|
image-type:
|
||||||
description: 'Image type'
|
description: 'Image type'
|
||||||
default: ''
|
default: ''
|
||||||
@@ -238,7 +234,6 @@ jobs:
|
|||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
@@ -267,7 +262,6 @@ jobs:
|
|||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.65.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
SKIP_DRIVERS=${{ inputs.skip-drivers }}
|
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
|
|||||||
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.22.0
|
uses: securego/gosec@v2.21.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
15
.github/workflows/test-extra.yml
vendored
15
.github/workflows/test-extra.yml
vendored
@@ -105,6 +105,14 @@ jobs:
|
|||||||
tests-parler-tts:
|
tests-parler-tts:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
- name: Force Install GIT latest
|
||||||
|
run: |
|
||||||
|
sudo apt-get update \
|
||||||
|
&& sudo apt-get install -y software-properties-common \
|
||||||
|
&& sudo apt-get update \
|
||||||
|
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||||
|
&& sudo apt-get update \
|
||||||
|
&& sudo apt-get install -y git
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -123,13 +131,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||||
- name: Setup tmate session if tests fail
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: mxschmitt/action-tmate@v3.19
|
|
||||||
with:
|
|
||||||
detached: true
|
|
||||||
connect-timeout-seconds: 180
|
|
||||||
limit-access-to-actor: true
|
|
||||||
|
|
||||||
tests-openvoice:
|
tests-openvoice:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|||||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -224,7 +224,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
|
||||||
pip install --user --no-cache-dir grpcio-tools
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -2,7 +2,6 @@
|
|||||||
/sources/
|
/sources/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.a
|
*.a
|
||||||
*.o
|
|
||||||
get-sources
|
get-sources
|
||||||
prepare-sources
|
prepare-sources
|
||||||
/backend/cpp/llama/grpc-server
|
/backend/cpp/llama/grpc-server
|
||||||
@@ -13,6 +12,7 @@ prepare-sources
|
|||||||
|
|
||||||
go-ggml-transformers
|
go-ggml-transformers
|
||||||
go-gpt2
|
go-gpt2
|
||||||
|
go-rwkv
|
||||||
whisper.cpp
|
whisper.cpp
|
||||||
/bloomz
|
/bloomz
|
||||||
go-bert
|
go-bert
|
||||||
|
|||||||
12
Dockerfile
12
Dockerfile
@@ -85,8 +85,7 @@ WORKDIR /build
|
|||||||
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
|
||||||
FROM requirements-core AS requirements-extras
|
FROM requirements-core AS requirements-extras
|
||||||
|
|
||||||
# Install uv as a system package
|
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
|
|
||||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||||
|
|
||||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||||
@@ -115,13 +114,12 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
|||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=12
|
||||||
ARG CUDA_MINOR_VERSION=0
|
ARG CUDA_MINOR_VERSION=0
|
||||||
ARG SKIP_DRIVERS=false
|
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
|
||||||
# Vulkan requirements
|
# Vulkan requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${BUILD_TYPE}" = "vulkan" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
software-properties-common pciutils wget gpg-agent && \
|
||||||
@@ -137,7 +135,7 @@ EOT
|
|||||||
|
|
||||||
# CuBLAS requirements
|
# CuBLAS requirements
|
||||||
RUN <<EOT bash
|
RUN <<EOT bash
|
||||||
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
|
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils
|
software-properties-common pciutils
|
||||||
@@ -163,7 +161,7 @@ RUN <<EOT bash
|
|||||||
EOT
|
EOT
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
libclblast-dev && \
|
libclblast-dev && \
|
||||||
@@ -171,7 +169,7 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
|||||||
rm -rf /var/lib/apt/lists/* \
|
rm -rf /var/lib/apt/lists/* \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
hipblas-dev \
|
hipblas-dev \
|
||||||
|
|||||||
188
Makefile
188
Makefile
@@ -8,15 +8,23 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=504af20ee4eae72080a56d59d744f6774f7901ce
|
CPPLLAMA_VERSION?=0a1c750c80147687df267114c81956757cc14382
|
||||||
|
|
||||||
|
# go-rwkv version
|
||||||
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
|
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
WHISPER_CPP_VERSION?=0fbaac9c891055796456df7b9122a70c220f9ca1
|
||||||
|
|
||||||
|
# bert.cpp version
|
||||||
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
|
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||||
|
|
||||||
# stablediffusion version
|
# stablediffusion version
|
||||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||||
@@ -26,18 +34,6 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
|||||||
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
|
||||||
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
|
||||||
|
|
||||||
# bark.cpp
|
|
||||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
|
||||||
BARKCPP_VERSION?=v1.0.0
|
|
||||||
|
|
||||||
# stablediffusion.cpp (ggml)
|
|
||||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
|
||||||
STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a
|
|
||||||
|
|
||||||
ONNX_VERSION?=1.20.0
|
|
||||||
ONNX_ARCH?=x64
|
|
||||||
ONNX_OS?=linux
|
|
||||||
|
|
||||||
export BUILD_TYPE?=
|
export BUILD_TYPE?=
|
||||||
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
|
||||||
export CMAKE_ARGS?=
|
export CMAKE_ARGS?=
|
||||||
@@ -49,7 +45,6 @@ CGO_LDFLAGS_WHISPER+=-lggml
|
|||||||
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
CUDA_LIBPATH?=/usr/local/cuda/lib64/
|
||||||
GO_TAGS?=
|
GO_TAGS?=
|
||||||
BUILD_ID?=
|
BUILD_ID?=
|
||||||
NATIVE?=false
|
|
||||||
|
|
||||||
TEST_DIR=/tmp/test
|
TEST_DIR=/tmp/test
|
||||||
|
|
||||||
@@ -88,25 +83,7 @@ ifndef UNAME_S
|
|||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
|
|
||||||
ifeq ($(NATIVE),false)
|
|
||||||
CMAKE_ARGS+=-DGGML_NATIVE=OFF
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Detect if we are running on arm64
|
|
||||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=aarch64
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OS),Darwin)
|
ifeq ($(OS),Darwin)
|
||||||
ONNX_OS=osx
|
|
||||||
ifneq (,$(findstring aarch64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=arm64
|
|
||||||
else ifneq (,$(findstring arm64,$(shell uname -m)))
|
|
||||||
ONNX_ARCH=arm64
|
|
||||||
else
|
|
||||||
ONNX_ARCH=x86_64
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||||
@@ -161,10 +138,10 @@ ifeq ($(BUILD_TYPE),hipblas)
|
|||||||
export CC=$(ROCM_HOME)/llvm/bin/clang
|
export CC=$(ROCM_HOME)/llvm/bin/clang
|
||||||
# llama-ggml has no hipblas support, so override it here.
|
# llama-ggml has no hipblas support, so override it here.
|
||||||
export STABLE_BUILD_TYPE=
|
export STABLE_BUILD_TYPE=
|
||||||
export GGML_HIP=1
|
export GGML_HIPBLAS=1
|
||||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -202,23 +179,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
|
|
||||||
ifeq ($(ONNX_OS),linux)
|
|
||||||
ifeq ($(ONNX_ARCH),x64)
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
|
|
||||||
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||||
# Use filter-out to remove the specified backends
|
# Use filter-out to remove the specified backends
|
||||||
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
|
||||||
@@ -239,6 +209,19 @@ endif
|
|||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
|
## BERT embeddings
|
||||||
|
sources/go-bert.cpp:
|
||||||
|
mkdir -p sources/go-bert.cpp
|
||||||
|
cd sources/go-bert.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(BERT_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(BERT_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||||
|
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||||
|
|
||||||
## go-llama.cpp
|
## go-llama.cpp
|
||||||
sources/go-llama.cpp:
|
sources/go-llama.cpp:
|
||||||
mkdir -p sources/go-llama.cpp
|
mkdir -p sources/go-llama.cpp
|
||||||
@@ -252,23 +235,6 @@ sources/go-llama.cpp:
|
|||||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
|
|
||||||
## bark.cpp
|
|
||||||
sources/bark.cpp:
|
|
||||||
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
|
|
||||||
cd sources/bark.cpp && \
|
|
||||||
git checkout $(BARKCPP_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/bark.cpp/build/libbark.a: sources/bark.cpp
|
|
||||||
cd sources/bark.cpp && \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) .. && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
|
|
||||||
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
|
|
||||||
$(MAKE) -C backend/go/bark libbark.a
|
|
||||||
|
|
||||||
## go-piper
|
## go-piper
|
||||||
sources/go-piper:
|
sources/go-piper:
|
||||||
mkdir -p sources/go-piper
|
mkdir -p sources/go-piper
|
||||||
@@ -282,7 +248,21 @@ sources/go-piper:
|
|||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
|
|
||||||
## stable diffusion (onnx)
|
|
||||||
|
## RWKV
|
||||||
|
sources/go-rwkv.cpp:
|
||||||
|
mkdir -p sources/go-rwkv.cpp
|
||||||
|
cd sources/go-rwkv.cpp && \
|
||||||
|
git init && \
|
||||||
|
git remote add origin $(RWKV_REPO) && \
|
||||||
|
git fetch origin && \
|
||||||
|
git checkout $(RWKV_VERSION) && \
|
||||||
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
|
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||||
|
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
|
|
||||||
|
## stable diffusion
|
||||||
sources/go-stable-diffusion:
|
sources/go-stable-diffusion:
|
||||||
mkdir -p sources/go-stable-diffusion
|
mkdir -p sources/go-stable-diffusion
|
||||||
cd sources/go-stable-diffusion && \
|
cd sources/go-stable-diffusion && \
|
||||||
@@ -295,38 +275,6 @@ sources/go-stable-diffusion:
|
|||||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||||
|
|
||||||
## stablediffusion (ggml)
|
|
||||||
sources/stablediffusion-ggml.cpp:
|
|
||||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
|
||||||
cd sources/stablediffusion-ggml.cpp && \
|
|
||||||
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp
|
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml build/libstable-diffusion.a
|
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
|
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/stablediffusion-ggml
|
|
||||||
endif
|
|
||||||
|
|
||||||
sources/onnxruntime:
|
|
||||||
mkdir -p sources/onnxruntime
|
|
||||||
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
|
||||||
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
|
|
||||||
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
|
|
||||||
|
|
||||||
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
|
|
||||||
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
|
|
||||||
ifeq ($(OS),Darwin)
|
|
||||||
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
|
|
||||||
else
|
|
||||||
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
|
|
||||||
endif
|
|
||||||
|
|
||||||
## tiny-dream
|
## tiny-dream
|
||||||
sources/go-tiny-dream:
|
sources/go-tiny-dream:
|
||||||
mkdir -p sources/go-tiny-dream
|
mkdir -p sources/go-tiny-dream
|
||||||
@@ -353,19 +301,23 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||||
|
|
||||||
dropreplace:
|
dropreplace:
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
|
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||||
@@ -378,8 +330,10 @@ prepare-sources: get-sources replace
|
|||||||
rebuild: ## Rebuilds the project
|
rebuild: ## Rebuilds the project
|
||||||
$(GOCMD) clean -cache
|
$(GOCMD) clean -cache
|
||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
|
$(MAKE) -C sources/go-rwkv.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
|
$(MAKE) -C sources/go-bert.cpp clean
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) -C sources/go-tiny-dream clean
|
$(MAKE) -C sources/go-tiny-dream clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
@@ -394,9 +348,7 @@ clean: ## Remove build related file
|
|||||||
rm -rf release/
|
rm -rf release/
|
||||||
rm -rf backend-assets/*
|
rm -rf backend-assets/*
|
||||||
$(MAKE) -C backend/cpp/grpc clean
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
$(MAKE) -C backend/go/bark clean
|
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
|
|
||||||
rm -rf backend/cpp/llama-* || true
|
rm -rf backend/cpp/llama-* || true
|
||||||
$(MAKE) dropreplace
|
$(MAKE) dropreplace
|
||||||
$(MAKE) protogen-clean
|
$(MAKE) protogen-clean
|
||||||
@@ -487,6 +439,8 @@ test-models/testmodel.ggml:
|
|||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
|
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
|
||||||
|
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
|
||||||
cp tests/models_fixtures/* test-models
|
cp tests/models_fixtures/* test-models
|
||||||
|
|
||||||
prepare-test: grpcs
|
prepare-test: grpcs
|
||||||
@@ -739,6 +693,13 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
|||||||
backend-assets/grpc: protogen-go replace
|
backend-assets/grpc: protogen-go replace
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
|
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/bert-embeddings
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
@@ -798,6 +759,10 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
|
|||||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||||
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
|
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||||
@@ -810,7 +775,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
|
|||||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
@@ -845,13 +810,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/llama-ggml
|
$(UPX) backend-assets/grpc/llama-ggml
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/bark-cpp
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
|
||||||
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
|
||||||
@@ -859,6 +817,13 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/piper
|
$(UPX) backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
|
||||||
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
|
||||||
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||||
|
ifneq ($(UPX),)
|
||||||
|
$(UPX) backend-assets/grpc/rwkv
|
||||||
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||||
@@ -866,13 +831,6 @@ ifneq ($(UPX),)
|
|||||||
$(UPX) backend-assets/grpc/stablediffusion
|
$(UPX) backend-assets/grpc/stablediffusion
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/silero-vad
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
|
||||||
@@ -933,7 +891,7 @@ docker-aio-all:
|
|||||||
|
|
||||||
docker-image-intel:
|
docker-image-intel:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
@@ -941,7 +899,7 @@ docker-image-intel:
|
|||||||
|
|
||||||
docker-image-intel-xpu:
|
docker-image-intel-xpu:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
|
|||||||
37
README.md
37
README.md
@@ -38,13 +38,9 @@
|
|||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||||
>
|
>
|
||||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
|
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
|
||||||
|
|
||||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[](https://artifacthub.io/packages/search?repo=localai)
|
||||||
|
|
||||||
@@ -60,17 +56,14 @@ curl https://localai.io/install.sh | sh
|
|||||||
|
|
||||||
Or run with docker:
|
Or run with docker:
|
||||||
```bash
|
```bash
|
||||||
# CPU only image:
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
|
|
||||||
|
|
||||||
# Nvidia GPU:
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
|
||||||
|
|
||||||
# CPU and GPU image (bigger size):
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
|
||||||
|
|
||||||
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
|
|
||||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||||
|
# Alternative images:
|
||||||
|
# - if you have an Nvidia GPU:
|
||||||
|
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||||
|
# - without preconfigured models
|
||||||
|
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||||
|
# - without preconfigured models for Nvidia GPUs
|
||||||
|
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||||
```
|
```
|
||||||
|
|
||||||
To load models:
|
To load models:
|
||||||
@@ -92,10 +85,6 @@ local-ai run oci://localai/phi-2:latest
|
|||||||
|
|
||||||
## 📰 Latest project news
|
## 📰 Latest project news
|
||||||
|
|
||||||
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
|
|
||||||
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
|
|
||||||
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
|
|
||||||
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
|
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||||
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||||
@@ -126,10 +115,10 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
|
|
||||||
## 🚀 [Features](https://localai.io/features/)
|
## 🚀 [Features](https://localai.io/features/)
|
||||||
|
|
||||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `transformers`, `vllm` ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||||
- 🎨 [Image generation](https://localai.io/features/image-generation)
|
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
||||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||||
@@ -137,7 +126,6 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
|
|||||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||||
- 🔊 Voice activity detection (Silero-VAD support)
|
|
||||||
- 🌍 Integrated WebUI!
|
- 🌍 Integrated WebUI!
|
||||||
|
|
||||||
## 💻 Usage
|
## 💻 Usage
|
||||||
@@ -160,7 +148,6 @@ Model galleries
|
|||||||
Other:
|
Other:
|
||||||
- Helm chart https://github.com/go-skynet/helm-charts
|
- Helm chart https://github.com/go-skynet/helm-charts
|
||||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||||
- Langchain: https://python.langchain.com/docs/integrations/providers/localai/
|
|
||||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
- Terminal utility https://github.com/djcopley/ShellOracle
|
||||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
||||||
@@ -168,9 +155,6 @@ Other:
|
|||||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
||||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||||
- Another Telegram Bot https://github.com/JackBekket/Hellper
|
|
||||||
- Auto-documentation https://github.com/JackBekket/Reflexia
|
|
||||||
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
|
|
||||||
- Github Actions: https://github.com/marketplace/actions/start-localai
|
- Github Actions: https://github.com/marketplace/actions/start-localai
|
||||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||||
|
|
||||||
@@ -245,6 +229,7 @@ LocalAI couldn't have been built without the help of great software already avai
|
|||||||
- https://github.com/antimatter15/alpaca.cpp
|
- https://github.com/antimatter15/alpaca.cpp
|
||||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||||
- https://github.com/ggerganov/whisper.cpp
|
- https://github.com/ggerganov/whisper.cpp
|
||||||
|
- https://github.com/saharNooby/rwkv.cpp
|
||||||
- https://github.com/rhasspy/piper
|
- https://github.com/rhasspy/piper
|
||||||
|
|
||||||
## 🤗 Contributors
|
## 🤗 Contributors
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
embeddings: true
|
backend: bert-embeddings
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|||||||
@@ -28,8 +28,6 @@ service Backend {
|
|||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
|
|
||||||
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
||||||
|
|
||||||
rpc VAD(VADRequest) returns (VADResponse) {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Define the empty request
|
// Define the empty request
|
||||||
@@ -159,7 +157,6 @@ message Reply {
|
|||||||
bytes message = 1;
|
bytes message = 1;
|
||||||
int32 tokens = 2;
|
int32 tokens = 2;
|
||||||
int32 prompt_tokens = 3;
|
int32 prompt_tokens = 3;
|
||||||
bytes audio = 5;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message ModelOptions {
|
message ModelOptions {
|
||||||
@@ -236,16 +233,6 @@ message ModelOptions {
|
|||||||
|
|
||||||
bool FlashAttention = 56;
|
bool FlashAttention = 56;
|
||||||
bool NoKVOffload = 57;
|
bool NoKVOffload = 57;
|
||||||
|
|
||||||
string ModelPath = 59;
|
|
||||||
|
|
||||||
repeated string LoraAdapters = 60;
|
|
||||||
repeated float LoraScales = 61;
|
|
||||||
|
|
||||||
repeated string Options = 62;
|
|
||||||
|
|
||||||
string CacheTypeKey = 63;
|
|
||||||
string CacheTypeValue = 64;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message Result {
|
message Result {
|
||||||
@@ -301,19 +288,6 @@ message TTSRequest {
|
|||||||
optional string language = 5;
|
optional string language = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message VADRequest {
|
|
||||||
repeated float audio = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message VADSegment {
|
|
||||||
float start = 1;
|
|
||||||
float end = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
message VADResponse {
|
|
||||||
repeated VADSegment segments = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
message SoundGenerationRequest {
|
message SoundGenerationRequest {
|
||||||
string text = 1;
|
string text = 1;
|
||||||
string model = 2;
|
string model = 2;
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas)
|
|||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
else ifeq ($(BUILD_TYPE),hipblas)
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
||||||
# But if it's OSX without metal, disable it here
|
# But if it's OSX without metal, disable it here
|
||||||
else ifeq ($(OS),Darwin)
|
else ifeq ($(OS),Darwin)
|
||||||
@@ -30,7 +30,9 @@ else ifeq ($(OS),Darwin)
|
|||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
CMAKE_ARGS+=-DGGML_METAL=OFF
|
||||||
else
|
else
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
CMAKE_ARGS+=-DGGML_METAL=ON
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
# Until this is tested properly, we disable embedded metal file
|
||||||
|
# as we already embed it as part of the LocalAI assets
|
||||||
|
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
|
||||||
TARGET+=--target ggml-metal
|
TARGET+=--target ggml-metal
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -203,7 +203,7 @@ struct llama_client_slot
|
|||||||
std::string stopping_word;
|
std::string stopping_word;
|
||||||
|
|
||||||
// sampling
|
// sampling
|
||||||
struct common_params_sampling sparams;
|
struct common_sampler_params sparams;
|
||||||
common_sampler *ctx_sampling = nullptr;
|
common_sampler *ctx_sampling = nullptr;
|
||||||
|
|
||||||
int32_t ga_i = 0; // group-attention state
|
int32_t ga_i = 0; // group-attention state
|
||||||
@@ -428,7 +428,6 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
llama_model *model = nullptr;
|
llama_model *model = nullptr;
|
||||||
llama_context *ctx = nullptr;
|
llama_context *ctx = nullptr;
|
||||||
const llama_vocab * vocab = nullptr;
|
|
||||||
|
|
||||||
clip_ctx *clp_ctx = nullptr;
|
clip_ctx *clp_ctx = nullptr;
|
||||||
|
|
||||||
@@ -440,7 +439,6 @@ struct llama_server_context
|
|||||||
bool clean_kv_cache = true;
|
bool clean_kv_cache = true;
|
||||||
bool all_slots_are_idle = false;
|
bool all_slots_are_idle = false;
|
||||||
bool add_bos_token = true;
|
bool add_bos_token = true;
|
||||||
bool has_eos_token = true;
|
|
||||||
|
|
||||||
int32_t n_ctx; // total context for all clients / slots
|
int32_t n_ctx; // total context for all clients / slots
|
||||||
|
|
||||||
@@ -494,8 +492,8 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
|
|
||||||
common_init_result common_init = common_init_from_params(params);
|
common_init_result common_init = common_init_from_params(params);
|
||||||
model = common_init.model.release();
|
model = common_init.model;
|
||||||
ctx = common_init.context.release();
|
ctx = common_init.context;
|
||||||
if (model == nullptr)
|
if (model == nullptr)
|
||||||
{
|
{
|
||||||
LOG_ERR("unable to load model: %s", params.model.c_str());
|
LOG_ERR("unable to load model: %s", params.model.c_str());
|
||||||
@@ -504,7 +502,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
if (multimodal) {
|
if (multimodal) {
|
||||||
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
|
||||||
const int n_embd_llm = llama_model_n_embd(model);
|
const int n_embd_llm = llama_n_embd(model);
|
||||||
if (n_embd_clip != n_embd_llm) {
|
if (n_embd_clip != n_embd_llm) {
|
||||||
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
@@ -513,15 +511,23 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vocab = llama_model_get_vocab(model);
|
|
||||||
n_ctx = llama_n_ctx(ctx);
|
n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
add_bos_token = llama_vocab_get_add_bos(vocab);
|
add_bos_token = llama_add_bos_token(model);
|
||||||
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void validate_model_chat_template(server_params & sparams) {
|
||||||
|
llama_chat_message chat[] = {{"user", "test"}};
|
||||||
|
std::vector<char> buf(1);
|
||||||
|
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
|
||||||
|
if (res < 0) {
|
||||||
|
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__);
|
||||||
|
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
llama_client_slot* get_active_slot() {
|
llama_client_slot* get_active_slot() {
|
||||||
for (llama_client_slot& slot : slots) {
|
for (llama_client_slot& slot : slots) {
|
||||||
// Check if the slot is currently processing
|
// Check if the slot is currently processing
|
||||||
@@ -656,7 +662,7 @@ struct llama_server_context
|
|||||||
|
|
||||||
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
|
||||||
slot_params default_params;
|
slot_params default_params;
|
||||||
common_params_sampling default_sparams;
|
common_sampler_params default_sparams;
|
||||||
|
|
||||||
slot->params.stream = json_value(data, "stream", false);
|
slot->params.stream = json_value(data, "stream", false);
|
||||||
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
|
||||||
@@ -664,6 +670,7 @@ struct llama_server_context
|
|||||||
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
||||||
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
|
||||||
|
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
|
||||||
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||||
@@ -675,6 +682,7 @@ struct llama_server_context
|
|||||||
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||||
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||||
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
|
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
|
||||||
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
@@ -719,8 +727,8 @@ struct llama_server_context
|
|||||||
slot->prompt = "";
|
slot->prompt = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (json_value(data, "ignore_eos", false) && has_eos_token) {
|
if (json_value(data, "ignore_eos", false)) {
|
||||||
slot->sparams.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY});
|
slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
slot->sparams.penalty_prompt_tokens.clear();
|
slot->sparams.penalty_prompt_tokens.clear();
|
||||||
@@ -759,13 +767,13 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
slot->sparams.logit_bias.clear();
|
slot->sparams.logit_bias.clear();
|
||||||
|
|
||||||
const auto &logit_bias = data.find("logit_bias");
|
const auto &logit_bias = data.find("logit_bias");
|
||||||
if (logit_bias != data.end() && logit_bias->is_array())
|
if (logit_bias != data.end() && logit_bias->is_array())
|
||||||
{
|
{
|
||||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
const int n_vocab = llama_n_vocab(model);
|
||||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
|
||||||
for (const auto &el : *logit_bias)
|
for (const auto &el : *logit_bias)
|
||||||
{
|
{
|
||||||
if (el.is_array() && el.size() == 2)
|
if (el.is_array() && el.size() == 2)
|
||||||
@@ -794,7 +802,7 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
else if (el[0].is_string())
|
else if (el[0].is_string())
|
||||||
{
|
{
|
||||||
auto toks = common_tokenize(vocab, el[0].get<std::string>(), false);
|
auto toks = common_tokenize(model, el[0].get<std::string>(), false);
|
||||||
for (auto tok : toks)
|
for (auto tok : toks)
|
||||||
{
|
{
|
||||||
slot->sparams.logit_bias.push_back({tok, bias});
|
slot->sparams.logit_bias.push_back({tok, bias});
|
||||||
@@ -1124,7 +1132,7 @@ struct llama_server_context
|
|||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.tok == llama_vocab_eos(vocab) || llama_vocab_is_eog(vocab, result.tok))
|
if (result.tok == llama_token_eos(model))
|
||||||
{
|
{
|
||||||
slot.stopped_eos = true;
|
slot.stopped_eos = true;
|
||||||
slot.has_next_token = false;
|
slot.has_next_token = false;
|
||||||
@@ -1198,6 +1206,7 @@ struct llama_server_context
|
|||||||
{"top_k", slot.sparams.top_k},
|
{"top_k", slot.sparams.top_k},
|
||||||
{"top_p", slot.sparams.top_p},
|
{"top_p", slot.sparams.top_p},
|
||||||
{"min_p", slot.sparams.min_p},
|
{"min_p", slot.sparams.min_p},
|
||||||
|
{"tfs_z", slot.sparams.tfs_z},
|
||||||
{"typical_p", slot.sparams.typ_p},
|
{"typical_p", slot.sparams.typ_p},
|
||||||
{"repeat_last_n", slot.sparams.penalty_last_n},
|
{"repeat_last_n", slot.sparams.penalty_last_n},
|
||||||
{"repeat_penalty", slot.sparams.penalty_repeat},
|
{"repeat_penalty", slot.sparams.penalty_repeat},
|
||||||
@@ -1206,12 +1215,13 @@ struct llama_server_context
|
|||||||
{"mirostat", slot.sparams.mirostat},
|
{"mirostat", slot.sparams.mirostat},
|
||||||
{"mirostat_tau", slot.sparams.mirostat_tau},
|
{"mirostat_tau", slot.sparams.mirostat_tau},
|
||||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||||
|
{"penalize_nl", slot.sparams.penalize_nl},
|
||||||
{"stop", slot.params.antiprompt},
|
{"stop", slot.params.antiprompt},
|
||||||
{"n_predict", slot.params.n_predict},
|
{"n_predict", slot.params.n_predict},
|
||||||
{"n_keep", params.n_keep},
|
{"n_keep", params.n_keep},
|
||||||
{"ignore_eos", slot.sparams.ignore_eos},
|
{"ignore_eos", slot.sparams.ignore_eos},
|
||||||
{"stream", slot.params.stream},
|
{"stream", slot.params.stream},
|
||||||
// {"logit_bias", slot.sparams.logit_bias},
|
// {"logit_bias", slot.sparams.logit_bias},
|
||||||
{"n_probs", slot.sparams.n_probs},
|
{"n_probs", slot.sparams.n_probs},
|
||||||
{"min_keep", slot.sparams.min_keep},
|
{"min_keep", slot.sparams.min_keep},
|
||||||
{"grammar", slot.sparams.grammar},
|
{"grammar", slot.sparams.grammar},
|
||||||
@@ -1319,7 +1329,7 @@ struct llama_server_context
|
|||||||
res.error = false;
|
res.error = false;
|
||||||
res.stop = true;
|
res.stop = true;
|
||||||
|
|
||||||
const int n_embd = llama_model_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
if (!params.embedding)
|
if (!params.embedding)
|
||||||
{
|
{
|
||||||
LOG_WARNING("embedding disabled", {
|
LOG_WARNING("embedding disabled", {
|
||||||
@@ -1418,7 +1428,7 @@ struct llama_server_context
|
|||||||
n_eval = n_batch;
|
n_eval = n_batch;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int n_embd = llama_model_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
float * embd = img.image_embedding + i * n_embd;
|
float * embd = img.image_embedding + i * n_embd;
|
||||||
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0);
|
||||||
if (llama_decode(ctx, llava_batch.batch))
|
if (llama_decode(ctx, llava_batch.batch))
|
||||||
@@ -1699,11 +1709,11 @@ struct llama_server_context
|
|||||||
suffix_tokens.erase(suffix_tokens.begin());
|
suffix_tokens.erase(suffix_tokens.begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_fim_pre(vocab));
|
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_vocab_bos(vocab)); // always add BOS
|
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
|
||||||
prefix_tokens.insert(prefix_tokens.end(), llama_vocab_fim_suf(vocab));
|
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
|
||||||
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
||||||
prefix_tokens.push_back(llama_vocab_fim_mid(vocab));
|
prefix_tokens.push_back(llama_token_middle(model));
|
||||||
prompt_tokens = prefix_tokens;
|
prompt_tokens = prefix_tokens;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -2095,6 +2105,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
|
||||||
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
|
||||||
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
|
||||||
|
// slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
|
||||||
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
|
||||||
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
|
||||||
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
||||||
@@ -2104,6 +2115,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
|
||||||
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
|
||||||
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
|
||||||
|
// slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
|
||||||
// slot->params.seed = json_value(data, "seed", default_params.seed);
|
// slot->params.seed = json_value(data, "seed", default_params.seed);
|
||||||
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
|
||||||
@@ -2117,6 +2129,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
|
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
|
||||||
data["top_k"] = predict->topk();
|
data["top_k"] = predict->topk();
|
||||||
data["top_p"] = predict->topp();
|
data["top_p"] = predict->topp();
|
||||||
|
data["tfs_z"] = predict->tailfreesamplingz();
|
||||||
data["typical_p"] = predict->typicalp();
|
data["typical_p"] = predict->typicalp();
|
||||||
data["temperature"] = predict->temperature();
|
data["temperature"] = predict->temperature();
|
||||||
data["repeat_last_n"] = predict->repeat();
|
data["repeat_last_n"] = predict->repeat();
|
||||||
@@ -2126,6 +2139,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["mirostat"] = predict->mirostat();
|
data["mirostat"] = predict->mirostat();
|
||||||
data["mirostat_tau"] = predict->mirostattau();
|
data["mirostat_tau"] = predict->mirostattau();
|
||||||
data["mirostat_eta"] = predict->mirostateta();
|
data["mirostat_eta"] = predict->mirostateta();
|
||||||
|
data["penalize_nl"] = predict->penalizenl();
|
||||||
data["n_keep"] = predict->nkeep();
|
data["n_keep"] = predict->nkeep();
|
||||||
data["seed"] = predict->seed();
|
data["seed"] = predict->seed();
|
||||||
data["grammar"] = predict->grammar();
|
data["grammar"] = predict->grammar();
|
||||||
@@ -2162,6 +2176,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
|
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
|
||||||
// llama.params.sparams.top_k = predict->topk();
|
// llama.params.sparams.top_k = predict->topk();
|
||||||
// llama.params.sparams.top_p = predict->topp();
|
// llama.params.sparams.top_p = predict->topp();
|
||||||
|
// llama.params.sparams.tfs_z = predict->tailfreesamplingz();
|
||||||
// llama.params.sparams.typical_p = predict->typicalp();
|
// llama.params.sparams.typical_p = predict->typicalp();
|
||||||
// llama.params.sparams.penalty_last_n = predict->repeat();
|
// llama.params.sparams.penalty_last_n = predict->repeat();
|
||||||
// llama.params.sparams.temp = predict->temperature();
|
// llama.params.sparams.temp = predict->temperature();
|
||||||
@@ -2171,6 +2186,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// llama.params.sparams.mirostat = predict->mirostat();
|
// llama.params.sparams.mirostat = predict->mirostat();
|
||||||
// llama.params.sparams.mirostat_tau = predict->mirostattau();
|
// llama.params.sparams.mirostat_tau = predict->mirostattau();
|
||||||
// llama.params.sparams.mirostat_eta = predict->mirostateta();
|
// llama.params.sparams.mirostat_eta = predict->mirostateta();
|
||||||
|
// llama.params.sparams.penalize_nl = predict->penalizenl();
|
||||||
// llama.params.n_keep = predict->nkeep();
|
// llama.params.n_keep = predict->nkeep();
|
||||||
// llama.params.seed = predict->seed();
|
// llama.params.seed = predict->seed();
|
||||||
// llama.params.sparams.grammar = predict->grammar();
|
// llama.params.sparams.grammar = predict->grammar();
|
||||||
@@ -2217,35 +2233,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
const std::vector<ggml_type> kv_cache_types = {
|
|
||||||
GGML_TYPE_F32,
|
|
||||||
GGML_TYPE_F16,
|
|
||||||
GGML_TYPE_BF16,
|
|
||||||
GGML_TYPE_Q8_0,
|
|
||||||
GGML_TYPE_Q4_0,
|
|
||||||
GGML_TYPE_Q4_1,
|
|
||||||
GGML_TYPE_IQ4_NL,
|
|
||||||
GGML_TYPE_Q5_0,
|
|
||||||
GGML_TYPE_Q5_1,
|
|
||||||
};
|
|
||||||
|
|
||||||
static ggml_type kv_cache_type_from_str(const std::string & s) {
|
|
||||||
for (const auto & type : kv_cache_types) {
|
|
||||||
if (ggml_type_name(type) == s) {
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw std::runtime_error("Unsupported cache type: " + s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string get_all_kv_cache_types() {
|
|
||||||
std::ostringstream msg;
|
|
||||||
for (const auto & type : kv_cache_types) {
|
|
||||||
msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
|
|
||||||
}
|
|
||||||
return msg.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void params_parse(const backend::ModelOptions* request,
|
static void params_parse(const backend::ModelOptions* request,
|
||||||
common_params & params) {
|
common_params & params) {
|
||||||
|
|
||||||
@@ -2259,12 +2246,6 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
}
|
}
|
||||||
// params.model_alias ??
|
// params.model_alias ??
|
||||||
params.model_alias = request->modelfile();
|
params.model_alias = request->modelfile();
|
||||||
if (!request->cachetypekey().empty()) {
|
|
||||||
params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
|
|
||||||
}
|
|
||||||
if (!request->cachetypevalue().empty()) {
|
|
||||||
params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
|
|
||||||
}
|
|
||||||
params.n_ctx = request->contextsize();
|
params.n_ctx = request->contextsize();
|
||||||
//params.memory_f16 = request->f16memory();
|
//params.memory_f16 = request->f16memory();
|
||||||
params.cpuparams.n_threads = request->threads();
|
params.cpuparams.n_threads = request->threads();
|
||||||
@@ -2323,7 +2304,6 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
params.use_mmap = request->mmap();
|
params.use_mmap = request->mmap();
|
||||||
params.flash_attn = request->flashattention();
|
params.flash_attn = request->flashattention();
|
||||||
params.no_kv_offload = request->nokvoffload();
|
params.no_kv_offload = request->nokvoffload();
|
||||||
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
|
|
||||||
|
|
||||||
params.embedding = request->embeddings();
|
params.embedding = request->embeddings();
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||||
index 3cd0d2fa..6c5e811a 100644
|
index 342042ff..224db9b5 100644
|
||||||
--- a/examples/llava/clip.cpp
|
--- a/examples/llava/clip.cpp
|
||||||
+++ b/examples/llava/clip.cpp
|
+++ b/examples/llava/clip.cpp
|
||||||
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
|
||||||
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
|
||||||
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
int* patches_data = (int*)malloc(ggml_nbytes(patches));
|
||||||
for (int i = 0; i < num_patches; i++) {
|
for (int i = 0; i < num_patches; i++) {
|
||||||
- patches_data[i] = i + 1;
|
- patches_data[i] = i + 1;
|
||||||
+ patches_data[i] = i;
|
+ patches_data[i] = i;
|
||||||
}
|
}
|
||||||
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
|
||||||
free(patches_data);
|
free(patches_data);
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
|
|
||||||
BUILD_TYPE?=
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
gobark.o:
|
|
||||||
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
|
|
||||||
|
|
||||||
libbark.a: gobark.o
|
|
||||||
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
|
|
||||||
$(AR) rcs libbark.a gobark.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
|
|
||||||
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f gobark.o libbark.a
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <tuple>
|
|
||||||
|
|
||||||
#include "bark.h"
|
|
||||||
#include "gobark.h"
|
|
||||||
#include "common.h"
|
|
||||||
#include "ggml.h"
|
|
||||||
|
|
||||||
struct bark_context *c;
|
|
||||||
|
|
||||||
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
|
|
||||||
if (step == bark_encoding_step::SEMANTIC) {
|
|
||||||
printf("\rGenerating semantic tokens... %d%%", progress);
|
|
||||||
} else if (step == bark_encoding_step::COARSE) {
|
|
||||||
printf("\rGenerating coarse tokens... %d%%", progress);
|
|
||||||
} else if (step == bark_encoding_step::FINE) {
|
|
||||||
printf("\rGenerating fine tokens... %d%%", progress);
|
|
||||||
}
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
int load_model(char *model) {
|
|
||||||
// initialize bark context
|
|
||||||
struct bark_context_params ctx_params = bark_context_default_params();
|
|
||||||
bark_params params;
|
|
||||||
|
|
||||||
params.model_path = model;
|
|
||||||
|
|
||||||
// ctx_params.verbosity = verbosity;
|
|
||||||
ctx_params.progress_callback = bark_print_progress_callback;
|
|
||||||
ctx_params.progress_callback_user_data = nullptr;
|
|
||||||
|
|
||||||
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
|
|
||||||
if (!bctx) {
|
|
||||||
fprintf(stderr, "%s: Could not load model\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
c = bctx;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int tts(char *text,int threads, char *dst ) {
|
|
||||||
|
|
||||||
ggml_time_init();
|
|
||||||
const int64_t t_main_start_us = ggml_time_us();
|
|
||||||
|
|
||||||
// generate audio
|
|
||||||
if (!bark_generate_audio(c, text, threads)) {
|
|
||||||
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const float *audio_data = bark_get_audio_data(c);
|
|
||||||
if (audio_data == NULL) {
|
|
||||||
fprintf(stderr, "%s: Could not get audio data\n", __func__);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int audio_arr_size = bark_get_audio_data_size(c);
|
|
||||||
|
|
||||||
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
|
|
||||||
|
|
||||||
write_wav_on_disk(audio_arr, dst);
|
|
||||||
|
|
||||||
// report timing
|
|
||||||
{
|
|
||||||
const int64_t t_main_end_us = ggml_time_us();
|
|
||||||
const int64_t t_load_us = bark_get_load_time(c);
|
|
||||||
const int64_t t_eval_us = bark_get_eval_time(c);
|
|
||||||
|
|
||||||
printf("\n\n");
|
|
||||||
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
|
|
||||||
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
|
|
||||||
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int unload() {
|
|
||||||
bark_free(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
|
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
|
|
||||||
// #include <gobark.h>
|
|
||||||
// #include <stdlib.h>
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Bark struct {
|
|
||||||
base.SingleThread
|
|
||||||
threads int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Bark) Load(opts *pb.ModelOptions) error {
|
|
||||||
|
|
||||||
sd.threads = int(opts.Threads)
|
|
||||||
|
|
||||||
modelFile := C.CString(opts.ModelFile)
|
|
||||||
defer C.free(unsafe.Pointer(modelFile))
|
|
||||||
|
|
||||||
ret := C.load_model(modelFile)
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
|
|
||||||
t := C.CString(opts.Text)
|
|
||||||
defer C.free(unsafe.Pointer(t))
|
|
||||||
|
|
||||||
dst := C.CString(opts.Dst)
|
|
||||||
defer C.free(unsafe.Pointer(dst))
|
|
||||||
|
|
||||||
threads := C.int(sd.threads)
|
|
||||||
|
|
||||||
ret := C.tts(t, threads, dst)
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
int load_model(char *model);
|
|
||||||
int tts(char *text,int threads, char *dst );
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
INCLUDE_PATH := $(abspath ./)
|
|
||||||
LIBRARY_PATH := $(abspath ./)
|
|
||||||
|
|
||||||
AR?=ar
|
|
||||||
CMAKE_ARGS?=
|
|
||||||
BUILD_TYPE?=
|
|
||||||
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
|
|
||||||
# keep standard at C11 and C++11
|
|
||||||
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
|
|
||||||
|
|
||||||
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
|
|
||||||
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
|
|
||||||
|
|
||||||
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
|
|
||||||
ifeq ($(BUILD_TYPE),cublas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CUDA=ON
|
|
||||||
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# to CMAKE_ARGS automatically
|
|
||||||
else ifeq ($(BUILD_TYPE),openblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
|
||||||
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
else ifeq ($(BUILD_TYPE),clblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
|
|
||||||
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
else ifeq ($(BUILD_TYPE),hipblas)
|
|
||||||
CMAKE_ARGS+=-DGGML_HIP=ON
|
|
||||||
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
|
|
||||||
# But if it's OSX without metal, disable it here
|
|
||||||
else ifeq ($(OS),Darwin)
|
|
||||||
ifneq ($(BUILD_TYPE),metal)
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=OFF
|
|
||||||
else
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL=ON
|
|
||||||
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
|
|
||||||
TARGET+=--target ggml-metal
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f16)
|
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DSD_SYCL=ON -DGGML_SYCL_F16=ON
|
|
||||||
# endif
|
|
||||||
|
|
||||||
# ifeq ($(BUILD_TYPE),sycl_f32)
|
|
||||||
# CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DSD_SYCL=ON
|
|
||||||
# endif
|
|
||||||
|
|
||||||
# warnings
|
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
|
|
||||||
|
|
||||||
# Find all .a archives in ARCHIVE_DIR
|
|
||||||
# (ggml can have different backends cpu, cuda, etc., each backend generates a .a archive)
|
|
||||||
GGML_ARCHIVE_DIR := build/ggml/src/
|
|
||||||
ALL_ARCHIVES := $(shell find $(GGML_ARCHIVE_DIR) -type f -name '*.a')
|
|
||||||
|
|
||||||
# Name of the single merged library
|
|
||||||
COMBINED_LIB := libggmlall.a
|
|
||||||
|
|
||||||
# Rule to merge all the .a files into one
|
|
||||||
$(COMBINED_LIB): $(ALL_ARCHIVES)
|
|
||||||
@echo "Merging all .a into $(COMBINED_LIB)"
|
|
||||||
rm -f $@
|
|
||||||
mkdir -p merge-tmp
|
|
||||||
for a in $(ALL_ARCHIVES); do \
|
|
||||||
( cd merge-tmp && ar x ../$$a ); \
|
|
||||||
done
|
|
||||||
( cd merge-tmp && ar rcs ../$@ *.o )
|
|
||||||
# Ensure we have a proper index
|
|
||||||
ranlib $@
|
|
||||||
# Clean up
|
|
||||||
rm -rf merge-tmp
|
|
||||||
|
|
||||||
build/libstable-diffusion.a:
|
|
||||||
@echo "Building SD with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
|
||||||
+bash -c "source $(ONEAPI_VARS); \
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release"
|
|
||||||
else
|
|
||||||
mkdir -p build && \
|
|
||||||
cd build && \
|
|
||||||
cmake $(CMAKE_ARGS) ../../../../../sources/stablediffusion-ggml.cpp && \
|
|
||||||
cmake --build . --config Release
|
|
||||||
endif
|
|
||||||
$(MAKE) $(COMBINED_LIB)
|
|
||||||
|
|
||||||
gosd.o:
|
|
||||||
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
|
|
||||||
|
|
||||||
libsd.a: gosd.o
|
|
||||||
cp $(INCLUDE_PATH)/build/libstable-diffusion.a ./libsd.a
|
|
||||||
$(AR) rcs libsd.a gosd.o
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf gosd.o libsd.a build $(COMBINED_LIB)
|
|
||||||
@@ -1,228 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <random>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include "gosd.h"
|
|
||||||
|
|
||||||
// #include "preprocessing.hpp"
|
|
||||||
#include "flux.hpp"
|
|
||||||
#include "stable-diffusion.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_STATIC
|
|
||||||
#include "stb_image.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_WRITE_STATIC
|
|
||||||
#include "stb_image_write.h"
|
|
||||||
|
|
||||||
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
|
||||||
#define STB_IMAGE_RESIZE_STATIC
|
|
||||||
#include "stb_image_resize.h"
|
|
||||||
|
|
||||||
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
|
|
||||||
const char* sample_method_str[] = {
|
|
||||||
"euler_a",
|
|
||||||
"euler",
|
|
||||||
"heun",
|
|
||||||
"dpm2",
|
|
||||||
"dpm++2s_a",
|
|
||||||
"dpm++2m",
|
|
||||||
"dpm++2mv2",
|
|
||||||
"ipndm",
|
|
||||||
"ipndm_v",
|
|
||||||
"lcm",
|
|
||||||
};
|
|
||||||
|
|
||||||
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
|
|
||||||
const char* schedule_str[] = {
|
|
||||||
"default",
|
|
||||||
"discrete",
|
|
||||||
"karras",
|
|
||||||
"exponential",
|
|
||||||
"ays",
|
|
||||||
"gits",
|
|
||||||
};
|
|
||||||
|
|
||||||
sd_ctx_t* sd_c;
|
|
||||||
|
|
||||||
sample_method_t sample_method;
|
|
||||||
|
|
||||||
int load_model(char *model, char* options[], int threads, int diff) {
|
|
||||||
fprintf (stderr, "Loading model!\n");
|
|
||||||
|
|
||||||
char *stableDiffusionModel = "";
|
|
||||||
if (diff == 1 ) {
|
|
||||||
stableDiffusionModel = model;
|
|
||||||
model = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// decode options. Options are in form optname:optvale, or if booleans only optname.
|
|
||||||
char *clip_l_path = "";
|
|
||||||
char *clip_g_path = "";
|
|
||||||
char *t5xxl_path = "";
|
|
||||||
char *vae_path = "";
|
|
||||||
char *scheduler = "";
|
|
||||||
char *sampler = "";
|
|
||||||
|
|
||||||
// If options is not NULL, parse options
|
|
||||||
for (int i = 0; options[i] != NULL; i++) {
|
|
||||||
char *optname = strtok(options[i], ":");
|
|
||||||
char *optval = strtok(NULL, ":");
|
|
||||||
if (optval == NULL) {
|
|
||||||
optval = "true";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcmp(optname, "clip_l_path")) {
|
|
||||||
clip_l_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "clip_g_path")) {
|
|
||||||
clip_g_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "t5xxl_path")) {
|
|
||||||
t5xxl_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "vae_path")) {
|
|
||||||
vae_path = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "scheduler")) {
|
|
||||||
scheduler = optval;
|
|
||||||
}
|
|
||||||
if (!strcmp(optname, "sampler")) {
|
|
||||||
sampler = optval;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int sample_method_found = -1;
|
|
||||||
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
|
|
||||||
if (!strcmp(sampler, sample_method_str[m])) {
|
|
||||||
sample_method_found = m;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (sample_method_found == -1) {
|
|
||||||
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
|
|
||||||
sample_method_found = EULER_A;
|
|
||||||
}
|
|
||||||
sample_method = (sample_method_t)sample_method_found;
|
|
||||||
|
|
||||||
int schedule_found = -1;
|
|
||||||
for (int d = 0; d < N_SCHEDULES; d++) {
|
|
||||||
if (!strcmp(scheduler, schedule_str[d])) {
|
|
||||||
schedule_found = d;
|
|
||||||
fprintf (stderr, "Found scheduler: %s\n", scheduler);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (schedule_found == -1) {
|
|
||||||
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
|
|
||||||
schedule_found = DEFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
schedule_t schedule = (schedule_t)schedule_found;
|
|
||||||
|
|
||||||
fprintf (stderr, "Creating context\n");
|
|
||||||
sd_ctx_t* sd_ctx = new_sd_ctx(model,
|
|
||||||
clip_l_path,
|
|
||||||
clip_g_path,
|
|
||||||
t5xxl_path,
|
|
||||||
stableDiffusionModel,
|
|
||||||
vae_path,
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
threads,
|
|
||||||
SD_TYPE_COUNT,
|
|
||||||
STD_DEFAULT_RNG,
|
|
||||||
schedule,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false);
|
|
||||||
|
|
||||||
if (sd_ctx == NULL) {
|
|
||||||
fprintf (stderr, "failed loading model (generic error)\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
fprintf (stderr, "Created context: OK\n");
|
|
||||||
|
|
||||||
sd_c = sd_ctx;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
|
|
||||||
|
|
||||||
sd_image_t* results;
|
|
||||||
|
|
||||||
std::vector<int> skip_layers = {7, 8, 9};
|
|
||||||
|
|
||||||
fprintf (stderr, "Generating image\n");
|
|
||||||
|
|
||||||
results = txt2img(sd_c,
|
|
||||||
text,
|
|
||||||
negativeText,
|
|
||||||
-1, //clip_skip
|
|
||||||
cfg_scale, // sfg_scale
|
|
||||||
3.5f,
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
sample_method,
|
|
||||||
steps,
|
|
||||||
seed,
|
|
||||||
1,
|
|
||||||
NULL,
|
|
||||||
0.9f,
|
|
||||||
20.f,
|
|
||||||
false,
|
|
||||||
"",
|
|
||||||
skip_layers.data(),
|
|
||||||
skip_layers.size(),
|
|
||||||
0,
|
|
||||||
0.01,
|
|
||||||
0.2);
|
|
||||||
|
|
||||||
if (results == NULL) {
|
|
||||||
fprintf (stderr, "NO results\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results[0].data == NULL) {
|
|
||||||
fprintf (stderr, "Results with no data\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf (stderr, "Writing PNG\n");
|
|
||||||
|
|
||||||
fprintf (stderr, "DST: %s\n", dst);
|
|
||||||
fprintf (stderr, "Width: %d\n", results[0].width);
|
|
||||||
fprintf (stderr, "Height: %d\n", results[0].height);
|
|
||||||
fprintf (stderr, "Channel: %d\n", results[0].channel);
|
|
||||||
fprintf (stderr, "Data: %p\n", results[0].data);
|
|
||||||
|
|
||||||
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
|
|
||||||
results[0].data, 0, NULL);
|
|
||||||
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
|
|
||||||
|
|
||||||
// TODO: free results. Why does it crash?
|
|
||||||
|
|
||||||
free(results[0].data);
|
|
||||||
results[0].data = NULL;
|
|
||||||
free(results);
|
|
||||||
fprintf (stderr, "gen_image is done", dst);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int unload() {
|
|
||||||
free_sd_ctx(sd_c);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
|
|
||||||
// #cgo LDFLAGS: -L${SRCDIR}/ -lsd -lstdc++ -lm -lggmlall -lgomp
|
|
||||||
// #include <gosd.h>
|
|
||||||
// #include <stdlib.h>
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
|
||||||
)
|
|
||||||
|
|
||||||
type SDGGML struct {
|
|
||||||
base.SingleThread
|
|
||||||
threads int
|
|
||||||
sampleMethod string
|
|
||||||
cfgScale float32
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
|
|
||||||
|
|
||||||
sd.threads = int(opts.Threads)
|
|
||||||
|
|
||||||
modelFile := C.CString(opts.ModelFile)
|
|
||||||
defer C.free(unsafe.Pointer(modelFile))
|
|
||||||
|
|
||||||
var options **C.char
|
|
||||||
// prepare the options array to pass to C
|
|
||||||
|
|
||||||
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
|
|
||||||
length := C.size_t(len(opts.Options))
|
|
||||||
options = (**C.char)(C.malloc(length * size))
|
|
||||||
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
|
|
||||||
|
|
||||||
var diffusionModel int
|
|
||||||
|
|
||||||
var oo []string
|
|
||||||
for _, op := range opts.Options {
|
|
||||||
if op == "diffusion_model" {
|
|
||||||
diffusionModel = 1
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it's an option path, we resolve absolute path from the model path
|
|
||||||
if strings.Contains(op, ":") && strings.Contains(op, "path") {
|
|
||||||
data := strings.Split(op, ":")
|
|
||||||
data[1] = filepath.Join(opts.ModelPath, data[1])
|
|
||||||
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
|
|
||||||
oo = append(oo, strings.Join(data, ":"))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
oo = append(oo, op)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
|
|
||||||
|
|
||||||
for i, x := range oo {
|
|
||||||
view[i] = C.CString(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
sd.cfgScale = opts.CFGScale
|
|
||||||
|
|
||||||
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("could not load model")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
|
|
||||||
t := C.CString(opts.PositivePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(t))
|
|
||||||
|
|
||||||
dst := C.CString(opts.Dst)
|
|
||||||
defer C.free(unsafe.Pointer(dst))
|
|
||||||
|
|
||||||
negative := C.CString(opts.NegativePrompt)
|
|
||||||
defer C.free(unsafe.Pointer(negative))
|
|
||||||
|
|
||||||
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
|
|
||||||
if ret != 0 {
|
|
||||||
return fmt.Errorf("inference failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
int load_model(char *model, char* options[], int threads, int diffusionModel);
|
|
||||||
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
34
backend/go/llm/bert/bert.go
Normal file
34
backend/go/llm/bert/bert.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
bert "github.com/go-skynet/go-bert.cpp"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Embeddings struct {
|
||||||
|
base.SingleThread
|
||||||
|
bert *bert.Bert
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||||
|
model, err := bert.New(opts.ModelFile)
|
||||||
|
llm.bert = model
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||||
|
|
||||||
|
if len(opts.EmbeddingTokens) > 0 {
|
||||||
|
tokens := []int{}
|
||||||
|
for _, t := range opts.EmbeddingTokens {
|
||||||
|
tokens = append(tokens, int(t))
|
||||||
|
}
|
||||||
|
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
||||||
|
}
|
||||||
|
|
||||||
|
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
@@ -14,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Bark{}); err != nil {
|
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -15,7 +15,7 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &VAD{}); err != nil {
|
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
95
backend/go/llm/rwkv/rwkv.go
Normal file
95
backend/go/llm/rwkv/rwkv.go
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
// This is a wrapper to statisfy the GRPC service interface
|
||||||
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/donomii/go-rwkv.cpp"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
const tokenizerSuffix = ".tokenizer.json"
|
||||||
|
|
||||||
|
type LLM struct {
|
||||||
|
base.SingleThread
|
||||||
|
|
||||||
|
rwkv *rwkv.RwkvState
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
|
tokenizerFile := opts.Tokenizer
|
||||||
|
if tokenizerFile == "" {
|
||||||
|
modelFile := filepath.Base(opts.ModelFile)
|
||||||
|
tokenizerFile = modelFile + tokenizerSuffix
|
||||||
|
}
|
||||||
|
modelPath := filepath.Dir(opts.ModelFile)
|
||||||
|
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
|
||||||
|
|
||||||
|
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
|
||||||
|
|
||||||
|
if model == nil {
|
||||||
|
return fmt.Errorf("rwkv could not load model")
|
||||||
|
}
|
||||||
|
llm.rwkv = model
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
stopWord := "\n"
|
||||||
|
if len(opts.StopPrompts) > 0 {
|
||||||
|
stopWord = opts.StopPrompts[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
|
||||||
|
|
||||||
|
return response, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
|
go func() {
|
||||||
|
|
||||||
|
stopWord := "\n"
|
||||||
|
if len(opts.StopPrompts) > 0 {
|
||||||
|
stopWord = opts.StopPrompts[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
||||||
|
fmt.Println("Error processing input: ", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
|
||||||
|
results <- s
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
close(results)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
||||||
|
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
|
||||||
|
if err != nil {
|
||||||
|
return pb.TokenizationResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
l := len(tokens)
|
||||||
|
i32Tokens := make([]int32, l)
|
||||||
|
|
||||||
|
for i, t := range tokens {
|
||||||
|
i32Tokens[i] = int32(t.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.TokenizationResponse{
|
||||||
|
Length: int32(l),
|
||||||
|
Tokens: i32Tokens,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -1,54 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/streamer45/silero-vad-go/speech"
|
|
||||||
)
|
|
||||||
|
|
||||||
type VAD struct {
|
|
||||||
base.SingleThread
|
|
||||||
detector *speech.Detector
|
|
||||||
}
|
|
||||||
|
|
||||||
func (vad *VAD) Load(opts *pb.ModelOptions) error {
|
|
||||||
v, err := speech.NewDetector(speech.DetectorConfig{
|
|
||||||
ModelPath: opts.ModelFile,
|
|
||||||
SampleRate: 16000,
|
|
||||||
//WindowSize: 1024,
|
|
||||||
Threshold: 0.5,
|
|
||||||
MinSilenceDurationMs: 100,
|
|
||||||
SpeechPadMs: 30,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("create silero detector: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
vad.detector = v
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
|
|
||||||
audio := req.Audio
|
|
||||||
|
|
||||||
segments, err := vad.detector.Detect(audio)
|
|
||||||
if err != nil {
|
|
||||||
return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
vadSegments := []*pb.VADSegment{}
|
|
||||||
for _, s := range segments {
|
|
||||||
vadSegments = append(vadSegments, &pb.VADSegment{
|
|
||||||
Start: float32(s.SpeechStartAt),
|
|
||||||
End: float32(s.SpeechEndAt),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
return pb.VADResponse{
|
|
||||||
Segments: vadSegments,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchaudio==2.3.1+cxx11.abi
|
torchaudio
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -17,9 +17,6 @@
|
|||||||
# LIMIT_TARGETS="cublas12"
|
# LIMIT_TARGETS="cublas12"
|
||||||
# source $(dirname $0)/../common/libbackend.sh
|
# source $(dirname $0)/../common/libbackend.sh
|
||||||
#
|
#
|
||||||
|
|
||||||
PYTHON_VERSION="3.10"
|
|
||||||
|
|
||||||
function init() {
|
function init() {
|
||||||
# Name of the backend (directory name)
|
# Name of the backend (directory name)
|
||||||
BACKEND_NAME=${PWD##*/}
|
BACKEND_NAME=${PWD##*/}
|
||||||
@@ -91,7 +88,7 @@ function getBuildProfile() {
|
|||||||
# always result in an activated virtual environment
|
# always result in an activated virtual environment
|
||||||
function ensureVenv() {
|
function ensureVenv() {
|
||||||
if [ ! -d "${EDIR}/venv" ]; then
|
if [ ! -d "${EDIR}/venv" ]; then
|
||||||
uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
|
uv venv ${EDIR}/venv
|
||||||
echo "virtualenv created"
|
echo "virtualenv created"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
.DEFAULT_GOAL := install
|
.DEFAULT_GOAL := install
|
||||||
|
|
||||||
.PHONY: install
|
.PHONY: install
|
||||||
install:
|
install: protogen
|
||||||
bash install.sh
|
bash install.sh
|
||||||
$(MAKE) protogen
|
|
||||||
|
|
||||||
.PHONY: protogen
|
.PHONY: protogen
|
||||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||||
@@ -13,7 +12,7 @@ protogen-clean:
|
|||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
bash protogen.sh
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: protogen-clean
|
clean: protogen-clean
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
@@ -1,3 +1,2 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
grpcio-tools
|
|
||||||
@@ -1,10 +1,9 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchaudio==2.3.1+cxx11.abi
|
torchaudio
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
coqui-tts
|
coqui-tts
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
packaging==24.1
|
packaging==24.1
|
||||||
@@ -17,7 +17,7 @@ import backend_pb2_grpc
|
|||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from diffusers import SanaPipeline, StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
@@ -247,16 +247,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
variant=variant)
|
variant=variant)
|
||||||
elif request.PipelineType == "FluxPipeline":
|
elif request.PipelineType == "FluxPipeline":
|
||||||
if fromSingleFile:
|
|
||||||
self.pipe = FluxPipeline.from_single_file(modelFile,
|
|
||||||
torch_dtype=torchType,
|
|
||||||
use_safetensors=True)
|
|
||||||
else:
|
|
||||||
self.pipe = FluxPipeline.from_pretrained(
|
self.pipe = FluxPipeline.from_pretrained(
|
||||||
request.Model,
|
request.Model,
|
||||||
torch_dtype=torch.bfloat16)
|
torch_dtype=torch.bfloat16)
|
||||||
if request.LowVRAM:
|
if request.LowVRAM:
|
||||||
self.pipe.enable_model_cpu_offload()
|
self.pipe.enable_model_cpu_offload()
|
||||||
elif request.PipelineType == "FluxTransformer2DModel":
|
elif request.PipelineType == "FluxTransformer2DModel":
|
||||||
dtype = torch.bfloat16
|
dtype = torch.bfloat16
|
||||||
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
|
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
|
||||||
@@ -275,13 +270,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
if request.LowVRAM:
|
if request.LowVRAM:
|
||||||
self.pipe.enable_model_cpu_offload()
|
self.pipe.enable_model_cpu_offload()
|
||||||
elif request.PipelineType == "SanaPipeline":
|
|
||||||
self.pipe = SanaPipeline.from_pretrained(
|
|
||||||
request.Model,
|
|
||||||
variant="bf16",
|
|
||||||
torch_dtype=torch.bfloat16)
|
|
||||||
self.pipe.vae.to(torch.bfloat16)
|
|
||||||
self.pipe.text_encoder.to(torch.bfloat16)
|
|
||||||
|
|
||||||
if CLIPSKIP and request.CLIPSkip != 0:
|
if CLIPSKIP and request.CLIPSkip != 0:
|
||||||
self.clip_skip = request.CLIPSkip
|
self.clip_skip = request.CLIPSkip
|
||||||
@@ -308,34 +296,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
self.pipe.controlnet = self.controlnet
|
self.pipe.controlnet = self.controlnet
|
||||||
else:
|
else:
|
||||||
self.controlnet = None
|
self.controlnet = None
|
||||||
|
# Assume directory from request.ModelFile.
|
||||||
if request.LoraAdapter and not os.path.isabs(request.LoraAdapter):
|
# Only if request.LoraAdapter it's not an absolute path
|
||||||
|
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
||||||
|
# get base path of modelFile
|
||||||
|
modelFileBase = os.path.dirname(request.ModelFile)
|
||||||
# modify LoraAdapter to be relative to modelFileBase
|
# modify LoraAdapter to be relative to modelFileBase
|
||||||
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
|
request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
|
||||||
|
|
||||||
device = "cpu" if not request.CUDA else "cuda"
|
device = "cpu" if not request.CUDA else "cuda"
|
||||||
self.device = device
|
self.device = device
|
||||||
if request.LoraAdapter:
|
if request.LoraAdapter:
|
||||||
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
||||||
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
||||||
|
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
|
||||||
self.pipe.load_lora_weights(request.LoraAdapter)
|
self.pipe.load_lora_weights(request.LoraAdapter)
|
||||||
else:
|
else:
|
||||||
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
||||||
if len(request.LoraAdapters) > 0:
|
|
||||||
i = 0
|
|
||||||
adapters_name = []
|
|
||||||
adapters_weights = []
|
|
||||||
for adapter in request.LoraAdapters:
|
|
||||||
if not os.path.isabs(adapter):
|
|
||||||
adapter = os.path.join(request.ModelPath, adapter)
|
|
||||||
self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
|
|
||||||
adapters_name.append(f"adapter_{i}")
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
for adapters_weight in request.LoraScales:
|
|
||||||
adapters_weights.append(adapters_weight)
|
|
||||||
|
|
||||||
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
|
|
||||||
|
|
||||||
if request.CUDA:
|
if request.CUDA:
|
||||||
self.pipe.to('cuda')
|
self.pipe.to('cuda')
|
||||||
@@ -416,6 +392,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
# create a dictionary of values for the parameters
|
# create a dictionary of values for the parameters
|
||||||
options = {
|
options = {
|
||||||
"negative_prompt": request.negative_prompt,
|
"negative_prompt": request.negative_prompt,
|
||||||
|
"width": request.width,
|
||||||
|
"height": request.height,
|
||||||
"num_inference_steps": steps,
|
"num_inference_steps": steps,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -433,13 +411,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
keys = options.keys()
|
keys = options.keys()
|
||||||
|
|
||||||
if request.EnableParameters != "":
|
if request.EnableParameters != "":
|
||||||
keys = [key.strip() for key in request.EnableParameters.split(",")]
|
keys = request.EnableParameters.split(",")
|
||||||
|
|
||||||
if request.EnableParameters == "none":
|
if request.EnableParameters == "none":
|
||||||
keys = []
|
keys = []
|
||||||
|
|
||||||
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
|
||||||
kwargs = {key: options.get(key) for key in keys if key in options}
|
kwargs = {key: options[key] for key in keys}
|
||||||
|
|
||||||
# Set seed
|
# Set seed
|
||||||
if request.seed > 0:
|
if request.seed > 0:
|
||||||
@@ -450,12 +428,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if self.PipelineType == "FluxPipeline":
|
if self.PipelineType == "FluxPipeline":
|
||||||
kwargs["max_sequence_length"] = 256
|
kwargs["max_sequence_length"] = 256
|
||||||
|
|
||||||
if request.width:
|
|
||||||
kwargs["width"] = request.width
|
|
||||||
|
|
||||||
if request.height:
|
|
||||||
kwargs["height"] = request.height
|
|
||||||
|
|
||||||
if self.PipelineType == "FluxTransformer2DModel":
|
if self.PipelineType == "FluxTransformer2DModel":
|
||||||
kwargs["output_type"] = "pil"
|
kwargs["output_type"] = "pil"
|
||||||
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
|
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
|
||||||
@@ -475,7 +447,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
export_to_video(video_frames, request.dst)
|
export_to_video(video_frames, request.dst)
|
||||||
return backend_pb2.Result(message="Media generated successfully", success=True)
|
return backend_pb2.Result(message="Media generated successfully", success=True)
|
||||||
|
|
||||||
print(f"Generating image with {kwargs=}", file=sys.stderr)
|
|
||||||
image = {}
|
image = {}
|
||||||
if COMPEL:
|
if COMPEL:
|
||||||
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
|
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchvision==0.18.1+cxx11.abi
|
torchvision
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
diffusers
|
diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,7 +1,3 @@
|
|||||||
torch==2.4.1
|
torch==2.4.1
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
git+https://github.com/myshell-ai/OpenVoice.git
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,8 +1,4 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch==2.4.1+cu118
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
git+https://github.com/myshell-ai/OpenVoice.git
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,7 +1,3 @@
|
|||||||
torch==2.4.1
|
torch==2.4.1
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
git+https://github.com/myshell-ai/OpenVoice.git
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,8 +1,4 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.4.1+rocm6.0
|
torch==2.4.1+rocm6.0
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
git+https://github.com/myshell-ai/OpenVoice.git
|
||||||
whisper-timestamped
|
|
||||||
pydub==0.25.1
|
|
||||||
wavmark==0.0.3
|
|
||||||
eng_to_ipa==0.0.2
|
|
||||||
@@ -1,15 +1,14 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchaudio==2.3.1+cxx11.abi
|
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==0.9.0
|
faster-whisper==0.9.0
|
||||||
pydub==0.25.1
|
pydub==0.25.1
|
||||||
wavmark==0.0.3
|
wavmark==0.0.3
|
||||||
|
numpy==1.22.0
|
||||||
eng_to_ipa==0.0.2
|
eng_to_ipa==0.0.2
|
||||||
inflect==7.0.0
|
inflect==7.0.0
|
||||||
unidecode==1.3.7
|
unidecode==1.3.7
|
||||||
|
|||||||
@@ -1,17 +1,20 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
librosa
|
librosa
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
pydub==0.25.1
|
||||||
|
wavmark==0.0.3
|
||||||
|
numpy==1.22.0
|
||||||
|
eng_to_ipa==0.0.2
|
||||||
inflect
|
inflect
|
||||||
unidecode
|
unidecode
|
||||||
|
whisper-timestamped
|
||||||
openai
|
openai
|
||||||
python-dotenv
|
python-dotenv
|
||||||
pypinyin
|
pypinyin
|
||||||
cn2an==0.5.22
|
cn2an==0.5.22
|
||||||
numpy==1.22.0
|
|
||||||
networkx==2.8.8
|
networkx==2.8.8
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
gradio==5.9.1
|
gradio==3.48.0
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
setuptools
|
|
||||||
@@ -12,10 +12,9 @@ export SKIP_CONDA=1
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: parler-tts
|
.PHONY: parler-tts
|
||||||
parler-tts:
|
parler-tts: protogen
|
||||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||||
bash install.sh $(CONDA_ENV_PATH)
|
bash install.sh $(CONDA_ENV_PATH)
|
||||||
$(MAKE) protogen
|
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run: protogen
|
run: protogen
|
||||||
@@ -37,7 +36,7 @@ protogen-clean:
|
|||||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||||
|
|
||||||
backend_pb2_grpc.py backend_pb2.py:
|
backend_pb2_grpc.py backend_pb2.py:
|
||||||
bash protogen.sh
|
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: protogen-clean
|
clean: protogen-clean
|
||||||
|
|||||||
@@ -11,18 +11,16 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
installRequirements
|
installRequirements
|
||||||
|
|
||||||
|
|
||||||
# https://github.com/descriptinc/audiotools/issues/101
|
# https://github.com/descriptinc/audiotools/issues/101
|
||||||
# incompatible protobuf versions.
|
# incompatible protobuf versions.
|
||||||
PYDIR=python3.10
|
# PYDIR=python3.10
|
||||||
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
# pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
||||||
|
|
||||||
if [ ! -d ${pyenv} ]; then
|
# if [ ! -d ${pyenv} ]; then
|
||||||
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
# echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
||||||
exit 1
|
# exit 1
|
||||||
fi
|
# fi
|
||||||
|
|
||||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
# curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
source $(dirname $0)/../common/libbackend.sh
|
|
||||||
|
|
||||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
numba==0.60.0
|
numba==0.60.0
|
||||||
grpcio-tools==1.42.0
|
git+https://github.com/descriptinc/audiotools
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchaudio==2.3.1+cxx11.abi
|
torchaudio
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
setuptools
|
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
rerankers[transformers]
|
rerankers[transformers]
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -2,5 +2,5 @@ torch==2.4.1
|
|||||||
accelerate
|
accelerate
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.2.0
|
||||||
transformers
|
transformers
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch==2.4.1+cu118
|
torch==2.4.1+cu118
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.2.0
|
||||||
transformers
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
torch==2.4.1
|
torch==2.4.1
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.2.0
|
||||||
transformers
|
transformers
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch==2.4.1+rocm6.0
|
torch==2.4.1+rocm6.0
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.2.0
|
||||||
transformers
|
transformers
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.3.1
|
sentence-transformers==3.2.0
|
||||||
transformers
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
datasets
|
datasets
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
transformers
|
transformers
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
scipy==1.14.0
|
scipy==1.14.0
|
||||||
certifi
|
certifi
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
intel-extension-for-transformers
|
intel-extension-for-transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
torchaudio==2.3.1+cxx11.abi
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
|
||||||
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
|
|||||||
git clone https://github.com/vllm-project/vllm
|
git clone https://github.com/vllm-project/vllm
|
||||||
fi
|
fi
|
||||||
pushd vllm
|
pushd vllm
|
||||||
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.1 protobuf bitsandbytes
|
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
|
||||||
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
VLLM_TARGET_DEVICE=cpu python setup.py install
|
VLLM_TARGET_DEVICE=cpu python setup.py install
|
||||||
popd
|
popd
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||||
intel-extension-for-pytorch==2.3.110+xpu
|
intel-extension-for-pytorch
|
||||||
accelerate
|
accelerate
|
||||||
torch==2.3.1+cxx11.abi
|
torch
|
||||||
transformers
|
transformers
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
oneccl_bind_pt==2.3.100+xpu
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.69.0
|
grpcio==1.67.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
38
core/application.go
Normal file
38
core/application.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/services"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
|
||||||
|
// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
|
||||||
|
type Application struct {
|
||||||
|
|
||||||
|
// Application-Level Config
|
||||||
|
ApplicationConfig *config.ApplicationConfig
|
||||||
|
// ApplicationState *ApplicationState
|
||||||
|
|
||||||
|
// Core Low-Level Services
|
||||||
|
BackendConfigLoader *config.BackendConfigLoader
|
||||||
|
ModelLoader *model.ModelLoader
|
||||||
|
|
||||||
|
// Backend Services
|
||||||
|
// EmbeddingsBackendService *backend.EmbeddingsBackendService
|
||||||
|
// ImageGenerationBackendService *backend.ImageGenerationBackendService
|
||||||
|
// LLMBackendService *backend.LLMBackendService
|
||||||
|
// TranscriptionBackendService *backend.TranscriptionBackendService
|
||||||
|
// TextToSpeechBackendService *backend.TextToSpeechBackendService
|
||||||
|
|
||||||
|
// LocalAI System Services
|
||||||
|
BackendMonitorService *services.BackendMonitorService
|
||||||
|
GalleryService *services.GalleryService
|
||||||
|
LocalAIMetricsService *services.LocalAIMetricsService
|
||||||
|
// OpenAIService *services.OpenAIService
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO [NEXT PR?]: Break up ApplicationConfig.
|
||||||
|
// Migrate over stuff that is not set via config at all - especially runtime stuff
|
||||||
|
type ApplicationState struct {
|
||||||
|
}
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
package application
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
|
||||||
"github.com/mudler/LocalAI/pkg/templates"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Application struct {
|
|
||||||
backendLoader *config.BackendConfigLoader
|
|
||||||
modelLoader *model.ModelLoader
|
|
||||||
applicationConfig *config.ApplicationConfig
|
|
||||||
templatesEvaluator *templates.Evaluator
|
|
||||||
}
|
|
||||||
|
|
||||||
func newApplication(appConfig *config.ApplicationConfig) *Application {
|
|
||||||
return &Application{
|
|
||||||
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
|
|
||||||
modelLoader: model.NewModelLoader(appConfig.ModelPath),
|
|
||||||
applicationConfig: appConfig,
|
|
||||||
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *Application) BackendLoader() *config.BackendConfigLoader {
|
|
||||||
return a.backendLoader
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *Application) ModelLoader() *model.ModelLoader {
|
|
||||||
return a.modelLoader
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *Application) ApplicationConfig() *config.ApplicationConfig {
|
|
||||||
return a.applicationConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *Application) TemplatesEvaluator() *templates.Evaluator {
|
|
||||||
return a.templatesEvaluator
|
|
||||||
}
|
|
||||||
@@ -11,9 +11,17 @@ import (
|
|||||||
|
|
||||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
var inferenceModel interface{}
|
||||||
|
var err error
|
||||||
|
|
||||||
inferenceModel, err := loader.Load(opts...)
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
|
|
||||||
|
if backendConfig.Backend == "" {
|
||||||
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
|
} else {
|
||||||
|
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||||
|
inferenceModel, err = loader.BackendLoader(opts...)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,8 +9,9 @@ import (
|
|||||||
|
|
||||||
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
inferenceModel, err := loader.Load(
|
|
||||||
|
inferenceModel, err := loader.BackendLoader(
|
||||||
opts...,
|
opts...,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -16,15 +16,15 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/gallery"
|
"github.com/mudler/LocalAI/core/gallery"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
model "github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLMResponse struct {
|
type LLMResponse struct {
|
||||||
Response string // should this be []byte?
|
Response string // should this be []byte?
|
||||||
Usage TokenUsage
|
Usage TokenUsage
|
||||||
AudioOutput string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type TokenUsage struct {
|
type TokenUsage struct {
|
||||||
@@ -35,6 +35,15 @@ type TokenUsage struct {
|
|||||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
|
|
||||||
|
var inferenceModel grpc.Backend
|
||||||
|
var err error
|
||||||
|
|
||||||
|
opts := ModelOptions(c, o, []model.Option{})
|
||||||
|
|
||||||
|
if c.Backend != "" {
|
||||||
|
opts = append(opts, model.WithBackendString(c.Backend))
|
||||||
|
}
|
||||||
|
|
||||||
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
// Check if the modelFile exists, if it doesn't try to load it from the gallery
|
||||||
if o.AutoloadGalleries { // experimental
|
if o.AutoloadGalleries { // experimental
|
||||||
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
|
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
|
||||||
@@ -47,8 +56,12 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := ModelOptions(c, o)
|
if c.Backend == "" {
|
||||||
inferenceModel, err := loader.Load(opts...)
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
|
} else {
|
||||||
|
inferenceModel, err = loader.BackendLoader(opts...)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -118,12 +131,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
ss := ""
|
ss := ""
|
||||||
|
|
||||||
var partialRune []byte
|
var partialRune []byte
|
||||||
err := inferenceModel.PredictStream(ctx, opts, func(reply *proto.Reply) {
|
err := inferenceModel.PredictStream(ctx, opts, func(chars []byte) {
|
||||||
msg := reply.Message
|
partialRune = append(partialRune, chars...)
|
||||||
partialRune = append(partialRune, msg...)
|
|
||||||
|
|
||||||
tokenUsage.Prompt = int(reply.PromptTokens)
|
|
||||||
tokenUsage.Completion = int(reply.Tokens)
|
|
||||||
|
|
||||||
for len(partialRune) > 0 {
|
for len(partialRune) > 0 {
|
||||||
r, size := utf8.DecodeRune(partialRune)
|
r, size := utf8.DecodeRune(partialRune)
|
||||||
@@ -137,10 +146,6 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
|
|
||||||
partialRune = partialRune[size:]
|
partialRune = partialRune[size:]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(msg) == 0 {
|
|
||||||
tokenCallback("", tokenUsage)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
return LLMResponse{
|
return LLMResponse{
|
||||||
Response: ss,
|
Response: ss,
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...model.Option) []model.Option {
|
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
||||||
name := c.Name
|
name := c.Name
|
||||||
if name == "" {
|
if name == "" {
|
||||||
name = c.Model
|
name = c.Model
|
||||||
@@ -122,17 +122,14 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||||
SchedulerType: c.Diffusers.SchedulerType,
|
SchedulerType: c.Diffusers.SchedulerType,
|
||||||
PipelineType: c.Diffusers.PipelineType,
|
PipelineType: c.Diffusers.PipelineType,
|
||||||
CFGScale: c.CFGScale,
|
CFGScale: c.Diffusers.CFGScale,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
LoraScale: c.LoraScale,
|
LoraScale: c.LoraScale,
|
||||||
LoraAdapters: c.LoraAdapters,
|
|
||||||
LoraScales: c.LoraScales,
|
|
||||||
F16Memory: f16,
|
F16Memory: f16,
|
||||||
LoraBase: c.LoraBase,
|
LoraBase: c.LoraBase,
|
||||||
IMG2IMG: c.Diffusers.IMG2IMG,
|
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||||
CLIPModel: c.Diffusers.ClipModel,
|
CLIPModel: c.Diffusers.ClipModel,
|
||||||
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||||
Options: c.Options,
|
|
||||||
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||||
ControlNet: c.Diffusers.ControlNet,
|
ControlNet: c.Diffusers.ControlNet,
|
||||||
ContextSize: int32(ctxSize),
|
ContextSize: int32(ctxSize),
|
||||||
@@ -151,8 +148,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
TensorParallelSize: int32(c.TensorParallelSize),
|
TensorParallelSize: int32(c.TensorParallelSize),
|
||||||
MMProj: c.MMProj,
|
MMProj: c.MMProj,
|
||||||
FlashAttention: c.FlashAttention,
|
FlashAttention: c.FlashAttention,
|
||||||
CacheTypeKey: c.CacheTypeK,
|
|
||||||
CacheTypeValue: c.CacheTypeV,
|
|
||||||
NoKVOffload: c.NoKVOffloading,
|
NoKVOffload: c.NoKVOffloading,
|
||||||
YarnExtFactor: c.YarnExtFactor,
|
YarnExtFactor: c.YarnExtFactor,
|
||||||
YarnAttnFactor: c.YarnAttnFactor,
|
YarnAttnFactor: c.YarnAttnFactor,
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ import (
|
|||||||
|
|
||||||
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
|
||||||
rerankModel, err := loader.Load(opts...)
|
rerankModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,8 +25,9 @@ func SoundGeneration(
|
|||||||
backendConfig config.BackendConfig,
|
backendConfig config.BackendConfig,
|
||||||
) (string, *proto.Result, error) {
|
) (string, *proto.Result, error) {
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
|
||||||
soundGenModel, err := loader.Load(opts...)
|
|
||||||
|
soundGenModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,15 +8,16 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
|
func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
|
||||||
if storeName == "" {
|
if storeName == "" {
|
||||||
storeName = "default"
|
storeName = "default"
|
||||||
}
|
}
|
||||||
|
|
||||||
sc := []model.Option{
|
sc := []model.Option{
|
||||||
model.WithBackendString(model.LocalStoreBackend),
|
model.WithBackendString(model.LocalStoreBackend),
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
model.WithAssetDir(appConfig.AssetsDestination),
|
||||||
model.WithModel(storeName),
|
model.WithModel(storeName),
|
||||||
}
|
}
|
||||||
|
|
||||||
return sl.Load(sc...)
|
return sl.BackendLoader(sc...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,10 @@ func TokenMetrics(
|
|||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
|
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{
|
||||||
model, err := loader.Load(opts...)
|
model.WithModel(modelFile),
|
||||||
|
})
|
||||||
|
model, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,13 +14,15 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
|
|||||||
var inferenceModel grpc.Backend
|
var inferenceModel grpc.Backend
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{
|
||||||
|
model.WithModel(modelFile),
|
||||||
|
})
|
||||||
|
|
||||||
if backendConfig.Backend == "" {
|
if backendConfig.Backend == "" {
|
||||||
inferenceModel, err = loader.Load(opts...)
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
} else {
|
} else {
|
||||||
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||||
inferenceModel, err = loader.Load(opts...)
|
inferenceModel, err = loader.BackendLoader(opts...)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return schema.TokenizeResponse{}, err
|
return schema.TokenizeResponse{}, err
|
||||||
|
|||||||
@@ -18,9 +18,9 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
|
|||||||
backendConfig.Backend = model.WhisperBackend
|
backendConfig.Backend = model.WhisperBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig)
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
|
|
||||||
transcriptionModel, err := ml.Load(opts...)
|
transcriptionModel, err := ml.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,8 +28,11 @@ func ModelTTS(
|
|||||||
bb = model.PiperBackend
|
bb = model.PiperBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
|
opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
|
||||||
ttsModel, err := loader.Load(opts...)
|
model.WithBackendString(bb),
|
||||||
|
model.WithModel(modelFile),
|
||||||
|
})
|
||||||
|
ttsModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", nil, err
|
return "", nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,12 +6,12 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/application"
|
|
||||||
cli_api "github.com/mudler/LocalAI/core/cli/api"
|
cli_api "github.com/mudler/LocalAI/core/cli/api"
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/http"
|
"github.com/mudler/LocalAI/core/http"
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
|
"github.com/mudler/LocalAI/core/startup"
|
||||||
"github.com/rs/zerolog"
|
"github.com/rs/zerolog"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
@@ -186,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if r.PreloadBackendOnly {
|
if r.PreloadBackendOnly {
|
||||||
_, err := application.New(opts...)
|
_, _, _, err := startup.Startup(opts...)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
app, err := application.New(opts...)
|
cl, ml, options, err := startup.Startup(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
appHTTP, err := http.API(app)
|
appHTTP, err := http.App(cl, ml, options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Msg("error during HTTP App construction")
|
log.Error().Err(err).Msg("error during HTTP App construction")
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -76,14 +76,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
|
|||||||
"util",
|
"util",
|
||||||
"llama-cpp-rpc-server",
|
"llama-cpp-rpc-server",
|
||||||
)
|
)
|
||||||
var extraArgs []string
|
extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
|
||||||
|
|
||||||
if r.ExtraLLamaCPPArgs != "" {
|
|
||||||
extraArgs = strings.Split(r.ExtraLLamaCPPArgs, " ")
|
|
||||||
}
|
|
||||||
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
|
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
|
||||||
log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
|
|
||||||
|
|
||||||
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
|
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
|
||||||
|
|
||||||
cmd := exec.Command(
|
cmd := exec.Command(
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ type BackendConfig struct {
|
|||||||
TemplateConfig TemplateConfig `yaml:"template"`
|
TemplateConfig TemplateConfig `yaml:"template"`
|
||||||
KnownUsecaseStrings []string `yaml:"known_usecases"`
|
KnownUsecaseStrings []string `yaml:"known_usecases"`
|
||||||
KnownUsecases *BackendConfigUsecases `yaml:"-"`
|
KnownUsecases *BackendConfigUsecases `yaml:"-"`
|
||||||
Pipeline Pipeline `yaml:"pipeline"`
|
|
||||||
|
|
||||||
PromptStrings, InputStrings []string `yaml:"-"`
|
PromptStrings, InputStrings []string `yaml:"-"`
|
||||||
InputToken [][]int `yaml:"-"`
|
InputToken [][]int `yaml:"-"`
|
||||||
@@ -73,20 +72,6 @@ type BackendConfig struct {
|
|||||||
|
|
||||||
Description string `yaml:"description"`
|
Description string `yaml:"description"`
|
||||||
Usage string `yaml:"usage"`
|
Usage string `yaml:"usage"`
|
||||||
|
|
||||||
Options []string `yaml:"options"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pipeline defines other models to use for audio-to-audio
|
|
||||||
type Pipeline struct {
|
|
||||||
TTS string `yaml:"tts"`
|
|
||||||
LLM string `yaml:"llm"`
|
|
||||||
Transcription string `yaml:"transcription"`
|
|
||||||
VAD string `yaml:"vad"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p Pipeline) IsNotConfigured() bool {
|
|
||||||
return p.LLM == "" || p.TTS == "" || p.Transcription == ""
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type File struct {
|
type File struct {
|
||||||
@@ -112,15 +97,16 @@ type GRPC struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Diffusers struct {
|
type Diffusers struct {
|
||||||
CUDA bool `yaml:"cuda"`
|
CUDA bool `yaml:"cuda"`
|
||||||
PipelineType string `yaml:"pipeline_type"`
|
PipelineType string `yaml:"pipeline_type"`
|
||||||
SchedulerType string `yaml:"scheduler_type"`
|
SchedulerType string `yaml:"scheduler_type"`
|
||||||
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
|
||||||
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
||||||
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
|
||||||
ClipModel string `yaml:"clip_model"` // Clip model to use
|
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
|
||||||
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
ClipModel string `yaml:"clip_model"` // Clip model to use
|
||||||
ControlNet string `yaml:"control_net"`
|
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
|
||||||
|
ControlNet string `yaml:"control_net"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// LLMConfig is a struct that holds the configuration that are
|
// LLMConfig is a struct that holds the configuration that are
|
||||||
@@ -148,30 +134,26 @@ type LLMConfig struct {
|
|||||||
TrimSpace []string `yaml:"trimspace"`
|
TrimSpace []string `yaml:"trimspace"`
|
||||||
TrimSuffix []string `yaml:"trimsuffix"`
|
TrimSuffix []string `yaml:"trimsuffix"`
|
||||||
|
|
||||||
ContextSize *int `yaml:"context_size"`
|
ContextSize *int `yaml:"context_size"`
|
||||||
NUMA bool `yaml:"numa"`
|
NUMA bool `yaml:"numa"`
|
||||||
LoraAdapter string `yaml:"lora_adapter"`
|
LoraAdapter string `yaml:"lora_adapter"`
|
||||||
LoraBase string `yaml:"lora_base"`
|
LoraBase string `yaml:"lora_base"`
|
||||||
LoraAdapters []string `yaml:"lora_adapters"`
|
LoraScale float32 `yaml:"lora_scale"`
|
||||||
LoraScales []float32 `yaml:"lora_scales"`
|
NoMulMatQ bool `yaml:"no_mulmatq"`
|
||||||
LoraScale float32 `yaml:"lora_scale"`
|
DraftModel string `yaml:"draft_model"`
|
||||||
NoMulMatQ bool `yaml:"no_mulmatq"`
|
NDraft int32 `yaml:"n_draft"`
|
||||||
DraftModel string `yaml:"draft_model"`
|
Quantization string `yaml:"quantization"`
|
||||||
NDraft int32 `yaml:"n_draft"`
|
LoadFormat string `yaml:"load_format"`
|
||||||
Quantization string `yaml:"quantization"`
|
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
||||||
LoadFormat string `yaml:"load_format"`
|
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
||||||
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||||
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
MMProj string `yaml:"mmproj"`
|
||||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
|
||||||
MMProj string `yaml:"mmproj"`
|
|
||||||
|
|
||||||
FlashAttention bool `yaml:"flash_attention"`
|
FlashAttention bool `yaml:"flash_attention"`
|
||||||
NoKVOffloading bool `yaml:"no_kv_offloading"`
|
NoKVOffloading bool `yaml:"no_kv_offloading"`
|
||||||
CacheTypeK string `yaml:"cache_type_k"`
|
|
||||||
CacheTypeV string `yaml:"cache_type_v"`
|
|
||||||
|
|
||||||
RopeScaling string `yaml:"rope_scaling"`
|
RopeScaling string `yaml:"rope_scaling"`
|
||||||
ModelType string `yaml:"type"`
|
ModelType string `yaml:"type"`
|
||||||
@@ -180,8 +162,6 @@ type LLMConfig struct {
|
|||||||
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
|
||||||
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
|
||||||
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
||||||
|
|
||||||
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
||||||
@@ -219,8 +199,6 @@ type TemplateConfig struct {
|
|||||||
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
||||||
|
|
||||||
Multimodal string `yaml:"multimodal"`
|
Multimodal string `yaml:"multimodal"`
|
||||||
|
|
||||||
JinjaTemplate bool `yaml:"jinja_template"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
|
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
|
||||||
|
|||||||
@@ -140,7 +140,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg.SetDefaults(append(opts, ModelPath(modelPath))...)
|
cfg.SetDefaults(opts...)
|
||||||
|
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user