Compare commits

..

2 Commits

Author SHA1 Message Date
Ettore Di Giacinto
63c5d843b6 chore(gosec): fix CI
downgrade to latest known version of the gosec action

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:17:27 +02:00
Ettore Di Giacinto
a9b0e264f2 chore(exllama): drop exllama backend
For polishing and cleaning up it makes now sense to drop exllama which
is completely unmaintained, and was only supporting the llamav1
architecture (nowadays it's superseded by llamav1) .

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:09:43 +02:00
415 changed files with 11156 additions and 8876 deletions

View File

@@ -1,11 +0,0 @@
meta {
name: model delete
type: http
seq: 7
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: none
auth: none
}

View File

Binary file not shown.

View File

@@ -1,16 +0,0 @@
meta {
name: transcribe
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
body: multipartForm
auth: none
}
body:multipart-form {
file: @file(transcription/gb1.ogg)
model: whisper-1
}

View File

@@ -9,7 +9,6 @@
# Param 2: email # Param 2: email
# #
config_user() { config_user() {
echo "Configuring git for $1 <$2>"
local gcn=$(git config --global user.name) local gcn=$(git config --global user.name)
if [ -z "${gcn}" ]; then if [ -z "${gcn}" ]; then
echo "Setting up git user / remote" echo "Setting up git user / remote"
@@ -25,7 +24,6 @@ config_user() {
# Param 2: remote url # Param 2: remote url
# #
config_remote() { config_remote() {
echo "Adding git remote and fetching $2 as $1"
local gr=$(git remote -v | grep $1) local gr=$(git remote -v | grep $1)
if [ -z "${gr}" ]; then if [ -z "${gr}" ]; then
git remote add $1 $2 git remote add $1 $2

1
.gitattributes vendored
View File

@@ -1,2 +1 @@
*.sh text eol=lf *.sh text eol=lf
backend/cpp/llama/*.hpp linguist-vendored

View File

@@ -29,14 +29,9 @@ def calculate_sha256(file_path):
def manual_safety_check_hf(repo_id): def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan") scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json() scan = scanResponse.json()
# Check if 'hasUnsafeFile' exists in the response if scan['hasUnsafeFile']:
if 'hasUnsafeFile' in scan: return scan
if scan['hasUnsafeFile']: return None
return scan
else:
return None
else:
return None
download_type, repo_id_or_url = parse_uri(uri) download_type, repo_id_or_url = parse_uri(uri)

View File

@@ -6,7 +6,6 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"github.com/microcosm-cc/bluemonday"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@@ -280,12 +279,6 @@ func main() {
return return
} }
// Ensure that all arbitrary text content is sanitized before display
for i, m := range models {
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
}
// render the template // render the template
data := struct { data := struct {
Models []*GalleryModel Models []*GalleryModel

View File

@@ -9,8 +9,6 @@ updates:
directory: "/" directory: "/"
schedule: schedule:
interval: "weekly" interval: "weekly"
ignore:
- dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
- package-ecosystem: "github-actions" - package-ecosystem: "github-actions"
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
directory: "/" directory: "/"

5
.github/labeler.yml vendored
View File

@@ -1,11 +1,6 @@
enhancements: enhancements:
- head-branch: ['^feature', 'feature'] - head-branch: ['^feature', 'feature']
dependencies:
- any:
- changed-files:
- any-glob-to-any-file: 'Makefile'
kind/documentation: kind/documentation:
- any: - any:
- changed-files: - changed-files:

View File

@@ -12,14 +12,23 @@ jobs:
- repository: "ggerganov/llama.cpp" - repository: "ggerganov/llama.cpp"
variable: "CPPLLAMA_VERSION" variable: "CPPLLAMA_VERSION"
branch: "master" branch: "master"
- repository: "go-skynet/go-ggml-transformers.cpp"
variable: "GOGGMLTRANSFORMERS_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
- repository: "ggerganov/whisper.cpp" - repository: "ggerganov/whisper.cpp"
variable: "WHISPER_CPP_VERSION" variable: "WHISPER_CPP_VERSION"
branch: "master" branch: "master"
- repository: "PABannier/bark.cpp" - repository: "go-skynet/go-bert.cpp"
variable: "BARKCPP_VERSION" variable: "BERT_VERSION"
branch: "master"
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main" branch: "main"
- repository: "leejet/stable-diffusion.cpp" - repository: "mudler/go-ggllm.cpp"
variable: "STABLEDIFFUSION_GGML_VERSION" variable: "GOGGLLM_VERSION"
branch: "master" branch: "master"
- repository: "mudler/go-stable-diffusion" - repository: "mudler/go-stable-diffusion"
variable: "STABLEDIFFUSION_VERSION" variable: "STABLEDIFFUSION_VERSION"

View File

@@ -23,7 +23,7 @@ jobs:
sudo pip install --upgrade pip sudo pip install --upgrade pip
pip install huggingface_hub pip install huggingface_hub
- name: 'Setup yq' - name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.3.1 uses: dcarbone/install-yq-action@v1.1.1
with: with:
version: 'v4.44.2' version: 'v4.44.2'
download-compressed: true download-compressed: true

View File

@@ -33,7 +33,7 @@ jobs:
run: | run: |
CGO_ENABLED=0 make build-api CGO_ENABLED=0 make build-api
- name: rm - name: rm
uses: appleboy/ssh-action@v1.2.0 uses: appleboy/ssh-action@v1.0.3
with: with:
host: ${{ secrets.EXPLORER_SSH_HOST }} host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }} username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
rm: true rm: true
target: ./local-ai target: ./local-ai
- name: restarting - name: restarting
uses: appleboy/ssh-action@v1.2.0 uses: appleboy/ssh-action@v1.0.3
with: with:
host: ${{ secrets.EXPLORER_SSH_HOST }} host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }} username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -15,7 +15,7 @@ jobs:
strategy: strategy:
matrix: matrix:
include: include:
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
platforms: 'linux/amd64' platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}} runs-on: ${{matrix.runs-on}}

View File

@@ -13,78 +13,6 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
hipblas-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: 2
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs: self-hosted-jobs:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@@ -111,7 +39,7 @@ jobs:
strategy: strategy:
# Pushing with all jobs in parallel # Pushing with all jobs in parallel
# eats the bandwidth of all the nodes # eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
matrix: matrix:
include: include:
# Extra images # Extra images
@@ -194,6 +122,29 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16' - build-type: 'sycl_f16'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
@@ -261,6 +212,26 @@ jobs:
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build: core-image-build:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml

View File

@@ -79,7 +79,7 @@ jobs:
args: ${{ steps.summarize.outputs.message }} args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails - name: Setup tmate session if fails
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -161,7 +161,7 @@ jobs:
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails - name: Setup tmate session if fails
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

View File

@@ -123,7 +123,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -232,7 +232,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -308,7 +308,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -350,7 +350,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

View File

@@ -18,7 +18,7 @@ jobs:
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner - name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }} if: ${{ github.actor != 'dependabot[bot]' }}
uses: securego/gosec@v2.21.4 uses: securego/gosec@v2.21.0
with: with:
# we let the report trigger content trigger a failure using the GitHub Security features. # we let the report trigger content trigger a failure using the GitHub Security features.
args: '-no-fail -fmt sarif -out results.sarif ./...' args: '-no-fail -fmt sarif -out results.sarif ./...'

View File

@@ -123,13 +123,6 @@ jobs:
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test make --jobs=5 --output-sync=target -C backend/python/parler-tts test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
tests-openvoice: tests-openvoice:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@@ -133,7 +133,7 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -178,26 +178,17 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies
run: |
# Install protoc
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
rm protoc.zip
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Build images - name: Build images
run: | run: |
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile . docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
- name: Test - name: Test
run: | run: |
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \ LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
make run-e2e-aio make run-e2e-aio
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -224,7 +215,7 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
pip install --user --no-cache-dir grpcio-tools pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test - name: Test
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include
@@ -235,7 +226,7 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

2
.gitignore vendored
View File

@@ -2,7 +2,6 @@
/sources/ /sources/
__pycache__/ __pycache__/
*.a *.a
*.o
get-sources get-sources
prepare-sources prepare-sources
/backend/cpp/llama/grpc-server /backend/cpp/llama/grpc-server
@@ -13,6 +12,7 @@ prepare-sources
go-ggml-transformers go-ggml-transformers
go-gpt2 go-gpt2
go-rwkv
whisper.cpp whisper.cpp
/bloomz /bloomz
go-bert go-bert

View File

@@ -15,6 +15,8 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
- [Documentation](#documentation) - [Documentation](#documentation)
- [Community and Communication](#community-and-communication) - [Community and Communication](#community-and-communication)
## Getting Started ## Getting Started
### Prerequisites ### Prerequisites
@@ -52,7 +54,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
## Coding Guidelines ## Coding Guidelines
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here. - No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
## Testing ## Testing
@@ -82,3 +84,5 @@ We are welcome the contribution of the documents, please open new PR or create a
- You can reach out via the Github issue tracker. - You can reach out via the Github issue tracker.
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions) - Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy) - Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
---

View File

@@ -9,8 +9,6 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root USER root
ARG GO_VERSION=1.22.6 ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH ARG TARGETARCH
ARG TARGETVARIANT ARG TARGETVARIANT
@@ -23,25 +21,13 @@ RUN apt-get update && \
build-essential \ build-essential \
ccache \ ccache \
ca-certificates \ ca-certificates \
curl libssl-dev \ cmake \
curl \
git \ git \
unzip upx-ucl && \ unzip upx-ucl && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go # Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
@@ -85,8 +71,7 @@ WORKDIR /build
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it # The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
FROM requirements-core AS requirements-extras FROM requirements-core AS requirements-extras
# Install uv as a system package RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
ENV PATH="/root/.cargo/bin:${PATH}" ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
@@ -203,8 +188,6 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0 ARG GRPC_VERSION=v1.65.0
ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.26.4
ENV MAKEFLAGS=${GRPC_MAKEFLAGS} ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -213,24 +196,12 @@ WORKDIR /build
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
ca-certificates \ ca-certificates \
build-essential curl libssl-dev \ build-essential \
cmake \
git && \ git && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later # We install GRPC to a different prefix here so that we can copy in only the build artifacts later
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree # saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
# and running make install in the target container # and running make install in the target container
@@ -326,10 +297,10 @@ COPY .git .
RUN make prepare RUN make prepare
## Build the binary ## Build the binary
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space ## If it's CUDA, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build ## We only leave the most CPU-optimized variant and the fallback for the cublas build
## (both will use CUDA or hipblas for the actual computation) ## (both will use CUDA for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \ SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \ else \
make build; \ make build; \
@@ -367,8 +338,9 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
ssh less wget ssh less && \
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime. apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN go install github.com/go-delve/delve/cmd/dlv@latest RUN go install github.com/go-delve/delve/cmd/dlv@latest

213
Makefile
View File

@@ -8,15 +8,23 @@ DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=cc98896db858df7aa40d0e16a505883ef196a482 CPPLLAMA_VERSION?=e6b7801bd189d102d901d3e72035611a25456ef1
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
# go-piper version # go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0 PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
# stablediffusion version # stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
@@ -26,18 +34,6 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
# bark.cpp
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
BARKCPP_VERSION?=v1.0.0
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
ONNX_OS?=linux
export BUILD_TYPE?= export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?= export CMAKE_ARGS?=
@@ -49,7 +45,6 @@ CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/ CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?= GO_TAGS?=
BUILD_ID?= BUILD_ID?=
NATIVE?=false
TEST_DIR=/tmp/test TEST_DIR=/tmp/test
@@ -88,25 +83,7 @@ ifndef UNAME_S
UNAME_S := $(shell uname -s) UNAME_S := $(shell uname -s)
endif endif
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Detect if we are running on arm64
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=aarch64
endif
ifeq ($(OS),Darwin) ifeq ($(OS),Darwin)
ONNX_OS=osx
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=arm64
else ifneq (,$(findstring arm64,$(shell uname -m)))
ONNX_ARCH=arm64
else
ONNX_ARCH=x86_64
endif
ifeq ($(OSX_SIGNING_IDENTITY),) ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/') OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
@@ -161,10 +138,10 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here. # llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE= export STABLE_BUILD_TYPE=
export GGML_HIP=1 export GGML_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)" AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif endif
@@ -202,23 +179,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
endif endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ifeq ($(ONNX_OS),linux)
ifeq ($(ONNX_ARCH),x64)
ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
endif
endif
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends # Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS)) ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
@@ -239,6 +209,19 @@ endif
all: help all: help
## BERT embeddings
sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp
cd sources/go-bert.cpp && \
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a
## go-llama.cpp ## go-llama.cpp
sources/go-llama.cpp: sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp mkdir -p sources/go-llama.cpp
@@ -252,23 +235,6 @@ sources/go-llama.cpp:
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## bark.cpp
sources/bark.cpp:
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
cd sources/bark.cpp && \
git checkout $(BARKCPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/bark.cpp/build/libbark.a: sources/bark.cpp
cd sources/bark.cpp && \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) .. && \
cmake --build . --config Release
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
$(MAKE) -C backend/go/bark libbark.a
## go-piper ## go-piper
sources/go-piper: sources/go-piper:
mkdir -p sources/go-piper mkdir -p sources/go-piper
@@ -282,7 +248,21 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## stable diffusion (onnx)
## RWKV
sources/go-rwkv.cpp:
mkdir -p sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && \
git init && \
git remote add origin $(RWKV_REPO) && \
git fetch origin && \
git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## stable diffusion
sources/go-stable-diffusion: sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion mkdir -p sources/go-stable-diffusion
cd sources/go-stable-diffusion && \ cd sources/go-stable-diffusion && \
@@ -295,44 +275,6 @@ sources/go-stable-diffusion:
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## stablediffusion (ggml)
sources/stablediffusion-ggml.cpp:
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
cd sources/stablediffusion-ggml.cpp && \
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
cd sources/stablediffusion-ggml.cpp && \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) .. && \
cmake --build . --config Release
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion-ggml
endif
sources/onnxruntime:
mkdir -p sources/onnxruntime
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
ifeq ($(OS),Darwin)
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
else
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
endif
## tiny-dream ## tiny-dream
sources/go-tiny-dream: sources/go-tiny-dream:
mkdir -p sources/go-tiny-dream mkdir -p sources/go-tiny-dream
@@ -359,19 +301,23 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
replace: replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace: dropreplace:
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
@@ -384,8 +330,10 @@ prepare-sources: get-sources replace
rebuild: ## Rebuilds the project rebuild: ## Rebuilds the project
$(GOCMD) clean -cache $(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean $(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean $(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean
$(MAKE) -C sources/go-piper clean $(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean $(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build $(MAKE) build
@@ -400,9 +348,7 @@ clean: ## Remove build related file
rm -rf release/ rm -rf release/
rm -rf backend-assets/* rm -rf backend-assets/*
$(MAKE) -C backend/cpp/grpc clean $(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/go/bark clean
$(MAKE) -C backend/cpp/llama clean $(MAKE) -C backend/cpp/llama clean
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
rm -rf backend/cpp/llama-* || true rm -rf backend/cpp/llama-* || true
$(MAKE) dropreplace $(MAKE) dropreplace
$(MAKE) protogen-clean $(MAKE) protogen-clean
@@ -413,9 +359,6 @@ clean-tests:
rm -rf test-dir rm -rf test-dir
rm -rf core/http/backend-assets rm -rf core/http/backend-assets
clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets
## Build: ## Build:
build: prepare backend-assets grpcs ## Build the project build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET}) $(info ${GREEN}I local-ai build info:${RESET})
@@ -493,6 +436,8 @@ test-models/testmodel.ggml:
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
cp tests/models_fixtures/* test-models cp tests/models_fixtures/* test-models
prepare-test: grpcs prepare-test: grpcs
@@ -520,15 +465,15 @@ run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures) ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
run-e2e-aio: protogen-go run-e2e-aio:
@echo 'Running e2e AIO tests' @echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
test-e2e: test-e2e:
@echo 'Running e2e tests' @echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \ BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
teardown-e2e: teardown-e2e:
rm -rf $(TEST_DIR) || true rm -rf $(TEST_DIR) || true
@@ -536,24 +481,24 @@ teardown-e2e:
test-llama: prepare-test test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
test-tts: prepare-test test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
test-stablediffusion: prepare-test test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
test-stores: backend-assets/grpc/local-store test-stores: backend-assets/grpc/local-store
mkdir -p tests/integration/backend-assets/grpc mkdir -p tests/integration/backend-assets/grpc
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/ cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
test-container: test-container:
docker build --target requirements -t local-ai-test-container . docker build --target requirements -t local-ai-test-container .
@@ -745,6 +690,13 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
backend-assets/grpc: protogen-go replace backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc mkdir -p backend-assets/grpc
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif
backend-assets/grpc/huggingface: backend-assets/grpc backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),) ifneq ($(UPX),)
@@ -804,6 +756,10 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
$(info ${GREEN}I llama-cpp build info:fallback${RESET}) $(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-cuda cp -rf backend/cpp/llama backend/cpp/llama-cuda
@@ -816,7 +772,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
cp -rf backend/cpp/llama backend/cpp/llama-hipblas cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge $(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET}) $(info ${GREEN}I llama-cpp build info:hipblas${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -851,13 +807,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml $(UPX) backend-assets/grpc/llama-ggml
endif endif
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bark-cpp
endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \ CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
@@ -865,6 +814,13 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper $(UPX) backend-assets/grpc/piper
endif endif
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/rwkv
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
@@ -872,13 +828,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion $(UPX) backend-assets/grpc/stablediffusion
endif endif
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/silero-vad
endif
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
@@ -939,7 +888,7 @@ docker-aio-all:
docker-image-intel: docker-image-intel:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -947,7 +896,7 @@ docker-image-intel:
docker-image-intel-xpu: docker-image-intel-xpu:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \

View File

@@ -38,13 +38,9 @@
</a> </a>
</p> </p>
<p align="center">
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
> >
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) > [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -60,42 +56,22 @@ curl https://localai.io/install.sh | sh
Or run with docker: Or run with docker:
```bash ```bash
# CPU only image:
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
# Nvidia GPU:
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
# CPU and GPU image (bigger size):
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
``` # Alternative images:
# - if you have an Nvidia GPU:
To load models: # docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
# - without preconfigured models
```bash # docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io) # - without preconfigured models for Nvidia GPUs
local-ai run llama-3.2-1b-instruct:q4_k_m # docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
# Start LocalAI with the phi-2 model directly from huggingface
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
# Install and run a model from the Ollama OCI registry
local-ai run ollama://gemma:2b
# Run a model from a configuration file
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
local-ai run oci://localai/phi-2:latest
``` ```
[💻 Getting started](https://localai.io/basics/getting_started/index.html) [💻 Getting started](https://localai.io/basics/getting_started/index.html)
## 📰 Latest project news ## 🔥🔥 Hot topics / Roadmap
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io) - Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723 - July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io - June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
@@ -107,12 +83,8 @@ local-ai run oci://localai/phi-2:latest
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222 - May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121 - April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) Hot topics (looking for contributors):
## 🔥🔥 Hot topics (looking for help):
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
- Realtime API https://github.com/mudler/LocalAI/issues/3714
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113 - 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
@@ -166,9 +138,6 @@ Other:
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Another Telegram Bot https://github.com/JackBekket/Hellper
- Auto-documentation https://github.com/JackBekket/Reflexia
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
- Github Actions: https://github.com/marketplace/actions/start-localai - Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/ - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
@@ -243,6 +212,7 @@ LocalAI couldn't have been built without the help of great software already avai
- https://github.com/antimatter15/alpaca.cpp - https://github.com/antimatter15/alpaca.cpp
- https://github.com/EdVince/Stable-Diffusion-NCNN - https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/ggerganov/whisper.cpp - https://github.com/ggerganov/whisper.cpp
- https://github.com/saharNooby/rwkv.cpp
- https://github.com/rhasspy/piper - https://github.com/rhasspy/piper
## 🤗 Contributors ## 🤗 Contributors

View File

@@ -1,7 +1,7 @@
name: text-embedding-ada-002 name: text-embedding-ada-002
embeddings: true backend: bert-embeddings
parameters: parameters:
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
usage: | usage: |
You can test this model with curl like this: You can test this model with curl like this:

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
f16: true f16: true
mmap: true mmap: true
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
f16: true f16: true
mmap: true mmap: true
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
mmap: false mmap: false
f16: false f16: false
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -26,21 +26,6 @@ service Backend {
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {} rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
rpc Rerank(RerankRequest) returns (RerankResult) {} rpc Rerank(RerankRequest) returns (RerankResult) {}
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
rpc VAD(VADRequest) returns (VADResponse) {}
}
// Define the empty request
message MetricsRequest {}
message MetricsResponse {
int32 slot_id = 1;
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
float tokens_per_second = 3;
int32 tokens_generated = 4;
int32 prompt_tokens_processed = 5;
} }
message RerankRequest { message RerankRequest {
@@ -149,9 +134,6 @@ message PredictOptions {
repeated string Images = 42; repeated string Images = 42;
bool UseTokenizerTemplate = 43; bool UseTokenizerTemplate = 43;
repeated Message Messages = 44; repeated Message Messages = 44;
repeated string Videos = 45;
repeated string Audios = 46;
string CorrelationId = 47;
} }
// The response message containing the result // The response message containing the result
@@ -221,7 +203,6 @@ message ModelOptions {
int32 SwapSpace = 53; int32 SwapSpace = 53;
int32 MaxModelLen = 54; int32 MaxModelLen = 54;
int32 TensorParallelSize = 55; int32 TensorParallelSize = 55;
string LoadFormat = 58;
string MMProj = 41; string MMProj = 41;
@@ -235,13 +216,6 @@ message ModelOptions {
bool FlashAttention = 56; bool FlashAttention = 56;
bool NoKVOffload = 57; bool NoKVOffload = 57;
string ModelPath = 59;
repeated string LoraAdapters = 60;
repeated float LoraScales = 61;
repeated string Options = 62;
} }
message Result { message Result {
@@ -297,19 +271,6 @@ message TTSRequest {
optional string language = 5; optional string language = 5;
} }
message VADRequest {
repeated float audio = 1;
}
message VADSegment {
float start = 1;
float end = 2;
}
message VADResponse {
repeated VADSegment segments = 1;
}
message SoundGenerationRequest { message SoundGenerationRequest {
string text = 1; string text = 1;
string model = 2; string model = 2;

View File

@@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas) else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIP=ON CMAKE_ARGS+=-DGGML_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here # But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin) else ifeq ($(OS),Darwin)
@@ -30,7 +30,9 @@ else ifeq ($(OS),Darwin)
CMAKE_ARGS+=-DGGML_METAL=OFF CMAKE_ARGS+=-DGGML_METAL=OFF
else else
CMAKE_ARGS+=-DGGML_METAL=ON CMAKE_ARGS+=-DGGML_METAL=ON
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON # Until this is tested properly, we disable embedded metal file
# as we already embed it as part of the LocalAI assets
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
TARGET+=--target ggml-metal TARGET+=--target ggml-metal
endif endif
endif endif

View File

@@ -13,7 +13,6 @@
#include <getopt.h> #include <getopt.h>
#include "clip.h" #include "clip.h"
#include "llava.h" #include "llava.h"
#include "log.h"
#include "stb_image.h" #include "stb_image.h"
#include "common.h" #include "common.h"
#include "json.hpp" #include "json.hpp"
@@ -113,7 +112,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret; std::string ret;
for (; begin != end; ++begin) for (; begin != end; ++begin)
{ {
ret += common_token_to_piece(ctx, *begin); ret += llama_token_to_piece(ctx, *begin);
} }
return ret; return ret;
} }
@@ -121,7 +120,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
// format incomplete utf-8 multibyte character for output // format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token) static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{ {
std::string out = token == -1 ? "" : common_token_to_piece(ctx, token); std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character // if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token) // (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80) if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -203,8 +202,8 @@ struct llama_client_slot
std::string stopping_word; std::string stopping_word;
// sampling // sampling
struct common_params_sampling sparams; struct gpt_sampler_params sparams;
common_sampler *ctx_sampling = nullptr; gpt_sampler *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state int32_t ga_i = 0; // group-attention state
int32_t ga_n = 1; // group-attention factor int32_t ga_n = 1; // group-attention factor
@@ -257,7 +256,7 @@ struct llama_client_slot
images.clear(); images.clear();
} }
bool has_budget(common_params &global_params) { bool has_budget(gpt_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1) if (params.n_predict == -1 && global_params.n_predict == -1)
{ {
return true; // limitless return true; // limitless
@@ -391,39 +390,6 @@ struct llama_metrics {
} }
}; };
struct llava_embd_batch {
std::vector<llama_pos> pos;
std::vector<int32_t> n_seq_id;
std::vector<llama_seq_id> seq_id_0;
std::vector<llama_seq_id *> seq_ids;
std::vector<int8_t> logits;
llama_batch batch;
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
pos .resize(n_tokens);
n_seq_id.resize(n_tokens);
seq_ids .resize(n_tokens + 1);
logits .resize(n_tokens);
seq_id_0.resize(1);
seq_id_0[0] = seq_id;
seq_ids [n_tokens] = nullptr;
batch = {
/*n_tokens =*/ n_tokens,
/*tokens =*/ nullptr,
/*embd =*/ embd,
/*pos =*/ pos.data(),
/*n_seq_id =*/ n_seq_id.data(),
/*seq_id =*/ seq_ids.data(),
/*logits =*/ logits.data(),
};
for (int i = 0; i < n_tokens; i++) {
batch.pos [i] = pos_0 + i;
batch.n_seq_id[i] = 1;
batch.seq_id [i] = seq_id_0.data();
batch.logits [i] = false;
}
}
};
struct llama_server_context struct llama_server_context
{ {
llama_model *model = nullptr; llama_model *model = nullptr;
@@ -431,7 +397,7 @@ struct llama_server_context
clip_ctx *clp_ctx = nullptr; clip_ctx *clp_ctx = nullptr;
common_params params; gpt_params params;
llama_batch batch; llama_batch batch;
@@ -474,7 +440,7 @@ struct llama_server_context
} }
} }
bool load_model(const common_params &params_) bool load_model(const gpt_params &params_)
{ {
params = params_; params = params_;
if (!params.mmproj.empty()) { if (!params.mmproj.empty()) {
@@ -482,7 +448,7 @@ struct llama_server_context
LOG_INFO("Multi Modal Mode Enabled", {}); LOG_INFO("Multi Modal Mode Enabled", {});
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1); clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
if(clp_ctx == nullptr) { if(clp_ctx == nullptr) {
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str()); LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
return false; return false;
} }
@@ -491,12 +457,12 @@ struct llama_server_context
} }
} }
common_init_result common_init = common_init_from_params(params); llama_init_result llama_init = llama_init_from_gpt_params(params);
model = common_init.model; model = llama_init.model;
ctx = common_init.context; ctx = llama_init.context;
if (model == nullptr) if (model == nullptr)
{ {
LOG_ERR("unable to load model: %s", params.model.c_str()); LOG_ERROR("unable to load model", {{"model", params.model}});
return false; return false;
} }
@@ -504,7 +470,7 @@ struct llama_server_context
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx); const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_n_embd(model); const int n_embd_llm = llama_n_embd(model);
if (n_embd_clip != n_embd_llm) { if (n_embd_clip != n_embd_llm) {
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm); LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
llama_free(ctx); llama_free(ctx);
llama_free_model(model); llama_free_model(model);
return false; return false;
@@ -523,21 +489,11 @@ struct llama_server_context
std::vector<char> buf(1); std::vector<char> buf(1);
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size()); int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
if (res < 0) { if (res < 0) {
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__); LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
} }
} }
llama_client_slot* get_active_slot() {
for (llama_client_slot& slot : slots) {
// Check if the slot is currently processing
if (slot.is_processing()) {
return &slot; // Return the active slot
}
}
return nullptr; // No active slot found
}
void initialize() { void initialize() {
// create slots // create slots
all_slots_are_idle = true; all_slots_are_idle = true;
@@ -611,12 +567,12 @@ struct llama_server_context
std::vector<llama_token> p; std::vector<llama_token> p;
if (first) if (first)
{ {
p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
first = false; first = false;
} }
else else
{ {
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
} }
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end()); prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
} }
@@ -633,7 +589,7 @@ struct llama_server_context
else else
{ {
auto s = json_prompt.template get<std::string>(); auto s = json_prompt.template get<std::string>();
prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
} }
return prompt_tokens; return prompt_tokens;
@@ -662,7 +618,7 @@ struct llama_server_context
bool launch_slot_with_data(llama_client_slot* &slot, json data) { bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params; slot_params default_params;
common_params_sampling default_sparams; gpt_sampler_params default_sparams;
slot->params.stream = json_value(data, "stream", false); slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false); slot->params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -670,6 +626,7 @@ struct llama_server_context
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p); slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range); slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
@@ -801,7 +758,7 @@ struct llama_server_context
} }
else if (el[0].is_string()) else if (el[0].is_string())
{ {
auto toks = common_tokenize(model, el[0].get<std::string>(), false); auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks) for (auto tok : toks)
{ {
slot->sparams.logit_bias.push_back({tok, bias}); slot->sparams.logit_bias.push_back({tok, bias});
@@ -833,7 +790,7 @@ struct llama_server_context
sampler_names.emplace_back(name); sampler_names.emplace_back(name);
} }
} }
slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false); slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
} }
else else
{ {
@@ -855,11 +812,10 @@ struct llama_server_context
img_sl.img_data = clip_image_u8_init(); img_sl.img_data = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
{ {
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d", LOG_ERROR("failed to load image", {
__func__, {"slot_id", slot->id},
slot->id, {"img_sl_id", img_sl.id}
img_sl.id });
);
return false; return false;
} }
LOG_VERBOSE("image loaded", { LOG_VERBOSE("image loaded", {
@@ -897,12 +853,12 @@ struct llama_server_context
} }
} }
if (!found) { if (!found) {
LOG("ERROR: Image with id: %i, not found.\n", img_id); LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
slot->images.clear(); slot->images.clear();
return false; return false;
} }
} catch (const std::invalid_argument& e) { } catch (const std::invalid_argument& e) {
LOG("Invalid image number id in prompt\n"); LOG_TEE("Invalid image number id in prompt\n");
slot->images.clear(); slot->images.clear();
return false; return false;
} }
@@ -917,9 +873,9 @@ struct llama_server_context
if (slot->ctx_sampling != nullptr) if (slot->ctx_sampling != nullptr)
{ {
common_sampler_free(slot->ctx_sampling); gpt_sampler_free(slot->ctx_sampling);
} }
slot->ctx_sampling = common_sampler_init(model, slot->sparams); slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
//llama_set_rng_seed(ctx, slot->params.seed); //llama_set_rng_seed(ctx, slot->params.seed);
slot->command = LOAD_PROMPT; slot->command = LOAD_PROMPT;
@@ -930,7 +886,7 @@ struct llama_server_context
{"task_id", slot->task_id}, {"task_id", slot->task_id},
}); });
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str()); // LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true; return true;
} }
@@ -946,13 +902,13 @@ struct llama_server_context
system_tokens.clear(); system_tokens.clear();
if (!system_prompt.empty()) { if (!system_prompt.empty()) {
system_tokens = common_tokenize(ctx, system_prompt, add_bos_token); system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
common_batch_clear(batch); llama_batch_clear(batch);
for (int i = 0; i < (int)system_tokens.size(); ++i) for (int i = 0; i < (int)system_tokens.size(); ++i)
{ {
common_batch_add(batch, system_tokens[i], i, { 0 }, false); llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
} }
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
@@ -966,10 +922,11 @@ struct llama_server_context
batch.n_seq_id + i, batch.n_seq_id + i,
batch.seq_id + i, batch.seq_id + i,
batch.logits + i, batch.logits + i,
0, 0, 0, // unused
}; };
if (llama_decode(ctx, batch_view) != 0) if (llama_decode(ctx, batch_view) != 0)
{ {
LOG("%s: llama_decode() failed\n", __func__); LOG_TEE("%s: llama_decode() failed\n", __func__);
return; return;
} }
} }
@@ -981,7 +938,7 @@ struct llama_server_context
} }
} }
LOG("system prompt updated\n"); LOG_TEE("system prompt updated\n");
system_need_update = false; system_need_update = false;
} }
@@ -1040,7 +997,7 @@ struct llama_server_context
bool process_token(completion_token_output &result, llama_client_slot &slot) { bool process_token(completion_token_output &result, llama_client_slot &slot) {
// remember which tokens were sampled - used for repetition penalties during sampling // remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = common_token_to_piece(ctx, result.tok); const std::string token_str = llama_token_to_piece(ctx, result.tok);
slot.sampled = result.tok; slot.sampled = result.tok;
// search stop word and delete it // search stop word and delete it
@@ -1163,7 +1120,7 @@ struct llama_server_context
} }
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) { if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG("Error processing the given image"); LOG_TEE("Error processing the given image");
return false; return false;
} }
@@ -1175,7 +1132,7 @@ struct llama_server_context
void send_error(task_server& task, const std::string &error) void send_error(task_server& task, const std::string &error)
{ {
LOG("task %i - error: %s\n", task.id, error.c_str()); LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
task_result res; task_result res;
res.id = task.id; res.id = task.id;
res.multitask_id = task.multitask_id; res.multitask_id = task.multitask_id;
@@ -1191,7 +1148,7 @@ struct llama_server_context
samplers.reserve(slot.sparams.samplers.size()); samplers.reserve(slot.sparams.samplers.size());
for (const auto & sampler : slot.sparams.samplers) for (const auto & sampler : slot.sparams.samplers)
{ {
samplers.emplace_back(common_sampler_type_to_str(sampler)); samplers.emplace_back(gpt_sampler_type_to_str(sampler));
} }
return json { return json {
@@ -1205,6 +1162,7 @@ struct llama_server_context
{"top_k", slot.sparams.top_k}, {"top_k", slot.sparams.top_k},
{"top_p", slot.sparams.top_p}, {"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p}, {"min_p", slot.sparams.min_p},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typ_p}, {"typical_p", slot.sparams.typ_p},
{"repeat_last_n", slot.sparams.penalty_last_n}, {"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat}, {"repeat_penalty", slot.sparams.penalty_repeat},
@@ -1246,7 +1204,7 @@ struct llama_server_context
if (slot.sparams.n_probs > 0) if (slot.sparams.n_probs > 0)
{ {
std::vector<completion_token_output> probs_output = {}; std::vector<completion_token_output> probs_output = {};
const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false); const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
if (probs_pos < probs_stop_pos) if (probs_pos < probs_stop_pos)
@@ -1298,7 +1256,7 @@ struct llama_server_context
std::vector<completion_token_output> probs = {}; std::vector<completion_token_output> probs = {};
if (!slot.params.stream && slot.stopped_word) if (!slot.params.stream && slot.stopped_word)
{ {
const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false); const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size()); probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
} }
else else
@@ -1409,10 +1367,11 @@ struct llama_server_context
batch.n_seq_id + i, batch.n_seq_id + i,
batch.seq_id + i, batch.seq_id + i,
batch.logits + i, batch.logits + i,
0, 0, 0, // unused
}; };
if (llama_decode(ctx, batch_view)) if (llama_decode(ctx, batch_view))
{ {
LOG("%s : failed to eval\n", __func__); LOG_TEE("%s : failed to eval\n", __func__);
return false; return false;
} }
} }
@@ -1427,18 +1386,17 @@ struct llama_server_context
} }
const int n_embd = llama_n_embd(model); const int n_embd = llama_n_embd(model);
float * embd = img.image_embedding + i * n_embd; llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0); if (llama_decode(ctx, batch_img))
if (llama_decode(ctx, llava_batch.batch))
{ {
LOG("%s : failed to eval image\n", __func__); LOG_TEE("%s : failed to eval image\n", __func__);
return false; return false;
} }
slot.n_past += n_eval; slot.n_past += n_eval;
} }
image_idx++; image_idx++;
common_batch_clear(batch); llama_batch_clear(batch);
// append prefix of next image // append prefix of next image
const auto json_prompt = (image_idx >= (int) slot.images.size()) ? const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
@@ -1448,7 +1406,7 @@ struct llama_server_context
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
for (int i = 0; i < (int) append_tokens.size(); ++i) for (int i = 0; i < (int) append_tokens.size(); ++i)
{ {
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true); llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_past += 1; slot.n_past += 1;
} }
} }
@@ -1580,7 +1538,7 @@ struct llama_server_context
update_system_prompt(); update_system_prompt();
} }
common_batch_clear(batch); llama_batch_clear(batch);
if (all_slots_are_idle) if (all_slots_are_idle)
{ {
@@ -1614,7 +1572,7 @@ struct llama_server_context
slot.n_past = 0; slot.n_past = 0;
slot.truncated = false; slot.truncated = false;
slot.has_next_token = true; slot.has_next_token = true;
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size()); LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
continue; continue;
// END LOCALAI changes // END LOCALAI changes
@@ -1658,7 +1616,7 @@ struct llama_server_context
// TODO: we always have to take into account the "system_tokens" // TODO: we always have to take into account the "system_tokens"
// this is not great and needs to be improved somehow // this is not great and needs to be improved somehow
common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true); llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
slot.n_past += 1; slot.n_past += 1;
} }
@@ -1752,7 +1710,7 @@ struct llama_server_context
if (!slot.params.cache_prompt) if (!slot.params.cache_prompt)
{ {
common_sampler_reset(slot.ctx_sampling); gpt_sampler_reset(slot.ctx_sampling);
slot.n_past = 0; slot.n_past = 0;
slot.n_past_se = 0; slot.n_past_se = 0;
@@ -1764,7 +1722,7 @@ struct llama_server_context
// push the prompt into the sampling context (do not apply grammar) // push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens) for (auto &token : prompt_tokens)
{ {
common_sampler_accept(slot.ctx_sampling, token, false); gpt_sampler_accept(slot.ctx_sampling, token, false);
} }
slot.n_past = common_part(slot.cache_tokens, prompt_tokens); slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1856,17 +1814,16 @@ struct llama_server_context
ga_i += ga_w/ga_n; ga_i += ga_w/ga_n;
} }
} }
common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false); llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
slot_npast++; slot_npast++;
} }
if (has_images && !ingest_images(slot, n_batch)) if (has_images && !ingest_images(slot, n_batch))
{ {
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d", LOG_ERROR("failed processing images", {
__func__, "slot_id", slot.id,
slot.id, "task_id", slot.task_id,
slot.task_id });
);
// FIXME @phymbert: to be properly tested // FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever // early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value // no one at the moment is checking the return value
@@ -1906,10 +1863,10 @@ struct llama_server_context
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1); const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w; const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
LOG("\n"); LOG_TEE("\n");
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd); LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n); LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd); LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd); llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n); llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
@@ -1919,7 +1876,7 @@ struct llama_server_context
slot.ga_i += slot.ga_w / slot.ga_n; slot.ga_i += slot.ga_w / slot.ga_n;
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i); LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
} }
slot.n_past_se += n_tokens; slot.n_past_se += n_tokens;
} }
@@ -1934,6 +1891,7 @@ struct llama_server_context
batch.n_seq_id + i, batch.n_seq_id + i,
batch.seq_id + i, batch.seq_id + i,
batch.logits + i, batch.logits + i,
0, 0, 0, // unused
}; };
const int ret = llama_decode(ctx, batch_view); const int ret = llama_decode(ctx, batch_view);
@@ -1943,11 +1901,11 @@ struct llama_server_context
if (n_batch == 1 || ret < 0) if (n_batch == 1 || ret < 0)
{ {
// if you get here, it means the KV cache is full - try increasing it via the context size // if you get here, it means the KV cache is full - try increasing it via the context size
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret); LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
return false; return false;
} }
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2); LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
// retry with half the batch size to try to find a free slot in the KV cache // retry with half the batch size to try to find a free slot in the KV cache
n_batch /= 2; n_batch /= 2;
@@ -1972,9 +1930,9 @@ struct llama_server_context
} }
completion_token_output result; completion_token_output result;
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i); const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
common_sampler_accept(slot.ctx_sampling, id, true); gpt_sampler_accept(slot.ctx_sampling, id, true);
slot.n_decoded += 1; slot.n_decoded += 1;
if (slot.n_decoded == 1) if (slot.n_decoded == 1)
@@ -1985,7 +1943,7 @@ struct llama_server_context
} }
result.tok = id; result.tok = id;
const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling); const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) { for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
result.probs.push_back({ result.probs.push_back({
@@ -2038,7 +1996,7 @@ static json format_partial_response(
struct token_translator struct token_translator
{ {
llama_context * ctx; llama_context * ctx;
std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); } std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); } std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
}; };
@@ -2103,6 +2061,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); // slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); // slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); // slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
// slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p); // slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); // slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n); // slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
@@ -2126,6 +2085,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens(); data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
data["top_k"] = predict->topk(); data["top_k"] = predict->topk();
data["top_p"] = predict->topp(); data["top_p"] = predict->topp();
data["tfs_z"] = predict->tailfreesamplingz();
data["typical_p"] = predict->typicalp(); data["typical_p"] = predict->typicalp();
data["temperature"] = predict->temperature(); data["temperature"] = predict->temperature();
data["repeat_last_n"] = predict->repeat(); data["repeat_last_n"] = predict->repeat();
@@ -2143,9 +2103,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["ignore_eos"] = predict->ignoreeos(); data["ignore_eos"] = predict->ignoreeos();
data["embeddings"] = predict->embeddings(); data["embeddings"] = predict->embeddings();
// Add the correlationid to json data
data["correlation_id"] = predict->correlationid();
// for each image in the request, add the image data // for each image in the request, add the image data
// //
for (int i = 0; i < predict->images_size(); i++) { for (int i = 0; i < predict->images_size(); i++) {
@@ -2172,6 +2129,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens(); // llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
// llama.params.sparams.top_k = predict->topk(); // llama.params.sparams.top_k = predict->topk();
// llama.params.sparams.top_p = predict->topp(); // llama.params.sparams.top_p = predict->topp();
// llama.params.sparams.tfs_z = predict->tailfreesamplingz();
// llama.params.sparams.typical_p = predict->typicalp(); // llama.params.sparams.typical_p = predict->typicalp();
// llama.params.sparams.penalty_last_n = predict->repeat(); // llama.params.sparams.penalty_last_n = predict->repeat();
// llama.params.sparams.temp = predict->temperature(); // llama.params.sparams.temp = predict->temperature();
@@ -2229,7 +2187,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// } // }
static void params_parse(const backend::ModelOptions* request, static void params_parse(const backend::ModelOptions* request,
common_params & params) { gpt_params & params) {
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809 // this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
@@ -2299,7 +2257,6 @@ static void params_parse(const backend::ModelOptions* request,
params.use_mmap = request->mmap(); params.use_mmap = request->mmap();
params.flash_attn = request->flashattention(); params.flash_attn = request->flashattention();
params.no_kv_offload = request->nokvoffload(); params.no_kv_offload = request->nokvoffload();
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
params.embedding = request->embeddings(); params.embedding = request->embeddings();
@@ -2338,7 +2295,7 @@ public:
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) { grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
// Implement LoadModel RPC // Implement LoadModel RPC
common_params params; gpt_params params;
params_parse(request, params); params_parse(request, params);
llama_backend_init(); llama_backend_init();
@@ -2384,11 +2341,6 @@ public:
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
reply.set_prompt_tokens(tokens_evaluated); reply.set_prompt_tokens(tokens_evaluated);
// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});
// Send the reply // Send the reply
writer->Write(reply); writer->Write(reply);
@@ -2412,12 +2364,6 @@ public:
std::string completion_text; std::string completion_text;
task_result result = llama.queue_results.recv(task_id); task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) { if (!result.error && result.stop) {
// Log Request Correlation Id
LOG_VERBOSE("correlation:", {
{ "id", data["correlation_id"] }
});
completion_text = result.result_json.value("content", ""); completion_text = result.result_json.value("content", "");
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
@@ -2457,31 +2403,6 @@ public:
return grpc::Status::OK; return grpc::Status::OK;
} }
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
llama_client_slot* active_slot = llama.get_active_slot();
if (active_slot != nullptr) {
// Calculate the tokens per second using existing logic
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
// Populate the response with metrics
response->set_slot_id(active_slot->id);
response->set_prompt_json_for_slot(active_slot->prompt.dump());
response->set_tokens_per_second(tokens_per_second);
response->set_tokens_generated(active_slot->n_decoded);
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
} else {
// Handle case when no active slot exists
response->set_slot_id(0);
response->set_prompt_json_for_slot("");
response->set_tokens_per_second(0);
response->set_tokens_generated(0);
response->set_prompt_tokens_processed(0);
}
return grpc::Status::OK;
}
}; };
void RunServer(const std::string& server_address) { void RunServer(const std::string& server_address) {

View File

@@ -1,25 +0,0 @@
INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./)
AR?=ar
BUILD_TYPE?=
# keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
# warnings
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
gobark.o:
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
libbark.a: gobark.o
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
$(AR) rcs libbark.a gobark.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
clean:
rm -f gobark.o libbark.a

View File

@@ -1,85 +0,0 @@
#include <iostream>
#include <tuple>
#include "bark.h"
#include "gobark.h"
#include "common.h"
#include "ggml.h"
struct bark_context *c;
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
if (step == bark_encoding_step::SEMANTIC) {
printf("\rGenerating semantic tokens... %d%%", progress);
} else if (step == bark_encoding_step::COARSE) {
printf("\rGenerating coarse tokens... %d%%", progress);
} else if (step == bark_encoding_step::FINE) {
printf("\rGenerating fine tokens... %d%%", progress);
}
fflush(stdout);
}
int load_model(char *model) {
// initialize bark context
struct bark_context_params ctx_params = bark_context_default_params();
bark_params params;
params.model_path = model;
// ctx_params.verbosity = verbosity;
ctx_params.progress_callback = bark_print_progress_callback;
ctx_params.progress_callback_user_data = nullptr;
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
if (!bctx) {
fprintf(stderr, "%s: Could not load model\n", __func__);
return 1;
}
c = bctx;
return 0;
}
int tts(char *text,int threads, char *dst ) {
ggml_time_init();
const int64_t t_main_start_us = ggml_time_us();
// generate audio
if (!bark_generate_audio(c, text, threads)) {
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
return 1;
}
const float *audio_data = bark_get_audio_data(c);
if (audio_data == NULL) {
fprintf(stderr, "%s: Could not get audio data\n", __func__);
return 1;
}
const int audio_arr_size = bark_get_audio_data_size(c);
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
write_wav_on_disk(audio_arr, dst);
// report timing
{
const int64_t t_main_end_us = ggml_time_us();
const int64_t t_load_us = bark_get_load_time(c);
const int64_t t_eval_us = bark_get_eval_time(c);
printf("\n\n");
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
}
return 0;
}
int unload() {
bark_free(c);
}

View File

@@ -1,52 +0,0 @@
package main
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
// #include <gobark.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Bark struct {
base.SingleThread
threads int
}
func (sd *Bark) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads)
modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile))
ret := C.load_model(modelFile)
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
t := C.CString(opts.Text)
defer C.free(unsafe.Pointer(t))
dst := C.CString(opts.Dst)
defer C.free(unsafe.Pointer(dst))
threads := C.int(sd.threads)
ret := C.tts(t, threads, dst)
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}

View File

@@ -1,8 +0,0 @@
#ifdef __cplusplus
extern "C" {
#endif
int load_model(char *model);
int tts(char *text,int threads, char *dst );
#ifdef __cplusplus
}
#endif

View File

@@ -1,21 +0,0 @@
INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./)
AR?=ar
BUILD_TYPE?=
# keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
# warnings
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
gosd.o:
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
libsd.a: gosd.o
cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
$(AR) rcs libsd.a gosd.o
clean:
rm -f gosd.o libsd.a

View File

@@ -1,228 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <iostream>
#include <random>
#include <string>
#include <vector>
#include "gosd.h"
// #include "preprocessing.hpp"
#include "flux.hpp"
#include "stable-diffusion.h"
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_STATIC
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#define STB_IMAGE_WRITE_STATIC
#include "stb_image_write.h"
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#define STB_IMAGE_RESIZE_STATIC
#include "stb_image_resize.h"
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
const char* sample_method_str[] = {
"euler_a",
"euler",
"heun",
"dpm2",
"dpm++2s_a",
"dpm++2m",
"dpm++2mv2",
"ipndm",
"ipndm_v",
"lcm",
};
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
const char* schedule_str[] = {
"default",
"discrete",
"karras",
"exponential",
"ays",
"gits",
};
sd_ctx_t* sd_c;
sample_method_t sample_method;
int load_model(char *model, char* options[], int threads, int diff) {
fprintf (stderr, "Loading model!\n");
char *stableDiffusionModel = "";
if (diff == 1 ) {
stableDiffusionModel = model;
model = "";
}
// decode options. Options are in form optname:optvale, or if booleans only optname.
char *clip_l_path = "";
char *clip_g_path = "";
char *t5xxl_path = "";
char *vae_path = "";
char *scheduler = "";
char *sampler = "";
// If options is not NULL, parse options
for (int i = 0; options[i] != NULL; i++) {
char *optname = strtok(options[i], ":");
char *optval = strtok(NULL, ":");
if (optval == NULL) {
optval = "true";
}
if (!strcmp(optname, "clip_l_path")) {
clip_l_path = optval;
}
if (!strcmp(optname, "clip_g_path")) {
clip_g_path = optval;
}
if (!strcmp(optname, "t5xxl_path")) {
t5xxl_path = optval;
}
if (!strcmp(optname, "vae_path")) {
vae_path = optval;
}
if (!strcmp(optname, "scheduler")) {
scheduler = optval;
}
if (!strcmp(optname, "sampler")) {
sampler = optval;
}
}
int sample_method_found = -1;
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
if (!strcmp(sampler, sample_method_str[m])) {
sample_method_found = m;
}
}
if (sample_method_found == -1) {
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
sample_method_found = EULER_A;
}
sample_method = (sample_method_t)sample_method_found;
int schedule_found = -1;
for (int d = 0; d < N_SCHEDULES; d++) {
if (!strcmp(scheduler, schedule_str[d])) {
schedule_found = d;
fprintf (stderr, "Found scheduler: %s\n", scheduler);
}
}
if (schedule_found == -1) {
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
schedule_found = DEFAULT;
}
schedule_t schedule = (schedule_t)schedule_found;
fprintf (stderr, "Creating context\n");
sd_ctx_t* sd_ctx = new_sd_ctx(model,
clip_l_path,
clip_g_path,
t5xxl_path,
stableDiffusionModel,
vae_path,
"",
"",
"",
"",
"",
false,
false,
false,
threads,
SD_TYPE_COUNT,
STD_DEFAULT_RNG,
schedule,
false,
false,
false,
false);
if (sd_ctx == NULL) {
fprintf (stderr, "failed loading model (generic error)\n");
return 1;
}
fprintf (stderr, "Created context: OK\n");
sd_c = sd_ctx;
return 0;
}
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
sd_image_t* results;
std::vector<int> skip_layers = {7, 8, 9};
fprintf (stderr, "Generating image\n");
results = txt2img(sd_c,
text,
negativeText,
-1, //clip_skip
cfg_scale, // sfg_scale
3.5f,
width,
height,
sample_method,
steps,
seed,
1,
NULL,
0.9f,
20.f,
false,
"",
skip_layers.data(),
skip_layers.size(),
0,
0.01,
0.2);
if (results == NULL) {
fprintf (stderr, "NO results\n");
return 1;
}
if (results[0].data == NULL) {
fprintf (stderr, "Results with no data\n");
return 1;
}
fprintf (stderr, "Writing PNG\n");
fprintf (stderr, "DST: %s\n", dst);
fprintf (stderr, "Width: %d\n", results[0].width);
fprintf (stderr, "Height: %d\n", results[0].height);
fprintf (stderr, "Channel: %d\n", results[0].channel);
fprintf (stderr, "Data: %p\n", results[0].data);
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
results[0].data, 0, NULL);
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
// TODO: free results. Why does it crash?
free(results[0].data);
results[0].data = NULL;
free(results);
fprintf (stderr, "gen_image is done", dst);
return 0;
}
int unload() {
free_sd_ctx(sd_c);
}

View File

@@ -1,96 +0,0 @@
package main
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
// #include <gosd.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"os"
"path/filepath"
"strings"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
type SDGGML struct {
base.SingleThread
threads int
sampleMethod string
cfgScale float32
}
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads)
modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile))
var options **C.char
// prepare the options array to pass to C
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
length := C.size_t(len(opts.Options))
options = (**C.char)(C.malloc(length * size))
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
var diffusionModel int
var oo []string
for _, op := range opts.Options {
if op == "diffusion_model" {
diffusionModel = 1
continue
}
// If it's an option path, we resolve absolute path from the model path
if strings.Contains(op, ":") && strings.Contains(op, "path") {
data := strings.Split(op, ":")
data[1] = filepath.Join(opts.ModelPath, data[1])
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
oo = append(oo, strings.Join(data, ":"))
}
} else {
oo = append(oo, op)
}
}
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
for i, x := range oo {
view[i] = C.CString(x)
}
sd.cfgScale = opts.CFGScale
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
if ret != 0 {
return fmt.Errorf("could not load model")
}
return nil
}
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
t := C.CString(opts.PositivePrompt)
defer C.free(unsafe.Pointer(t))
dst := C.CString(opts.Dst)
defer C.free(unsafe.Pointer(dst))
negative := C.CString(opts.NegativePrompt)
defer C.free(unsafe.Pointer(negative))
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}

View File

@@ -1,8 +0,0 @@
#ifdef __cplusplus
extern "C" {
#endif
int load_model(char *model, char* options[], int threads, int diffusionModel);
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
#ifdef __cplusplus
}
#endif

View File

@@ -1,20 +0,0 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,34 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
bert "github.com/go-skynet/go-bert.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Embeddings struct {
base.SingleThread
bert *bert.Bert
}
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
model, err := bert.New(opts.ModelFile)
llm.bert = model
return err
}
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
}
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
}

View File

@@ -1,6 +1,7 @@
package main package main
// Note: this is started internally by LocalAI and a server is allocated for each model // Note: this is started internally by LocalAI and a server is allocated for each model
import ( import (
"flag" "flag"
@@ -14,7 +15,7 @@ var (
func main() { func main() {
flag.Parse() flag.Parse()
if err := grpc.StartServer(*addr, &Bark{}); err != nil { if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
panic(err) panic(err)
} }
} }

View File

@@ -15,7 +15,7 @@ var (
func main() { func main() {
flag.Parse() flag.Parse()
if err := grpc.StartServer(*addr, &VAD{}); err != nil { if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err) panic(err)
} }
} }

View File

@@ -0,0 +1,95 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"path/filepath"
"github.com/donomii/go-rwkv.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
const tokenizerSuffix = ".tokenizer.json"
type LLM struct {
base.SingleThread
rwkv *rwkv.RwkvState
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
tokenizerFile := opts.Tokenizer
if tokenizerFile == "" {
modelFile := filepath.Base(opts.ModelFile)
tokenizerFile = modelFile + tokenizerSuffix
}
modelPath := filepath.Dir(opts.ModelFile)
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
if model == nil {
return fmt.Errorf("rwkv could not load model")
}
llm.rwkv = model
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
return "", err
}
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
return response, nil
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
fmt.Println("Error processing input: ", err)
return
}
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
results <- s
return true
})
close(results)
}()
return nil
}
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
if err != nil {
return pb.TokenizationResponse{}, err
}
l := len(tokens)
i32Tokens := make([]int32, l)
for i, t := range tokens {
i32Tokens[i] = int32(t.ID)
}
return pb.TokenizationResponse{
Length: int32(l),
Tokens: i32Tokens,
}, nil
}

View File

@@ -1,54 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/streamer45/silero-vad-go/speech"
)
type VAD struct {
base.SingleThread
detector *speech.Detector
}
func (vad *VAD) Load(opts *pb.ModelOptions) error {
v, err := speech.NewDetector(speech.DetectorConfig{
ModelPath: opts.ModelFile,
SampleRate: 16000,
//WindowSize: 1024,
Threshold: 0.5,
MinSilenceDurationMs: 0,
SpeechPadMs: 0,
})
if err != nil {
return fmt.Errorf("create silero detector: %w", err)
}
vad.detector = v
return err
}
func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
audio := req.Audio
segments, err := vad.detector.Detect(audio)
if err != nil {
return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
}
vadSegments := []*pb.VADSegment{}
for _, s := range segments {
vadSegments = append(vadSegments, &pb.VADSegment{
Start: float32(s.SpeechStartAt),
End: float32(s.SpeechEndAt),
})
}
return pb.VADResponse{
Segments: vadSegments,
}, nil
}

View File

@@ -1,2 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch

View File

@@ -1 +1 @@
torch==2.4.1 torch

View File

@@ -1,2 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,6 +1,6 @@
accelerate accelerate
auto-gptq==0.7.1 auto-gptq==0.7.1
grpcio==1.68.1 grpcio==1.66.1
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -1,4 +1,4 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
torchaudio==2.4.1+cu118 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
torchaudio==2.4.1+rocm6.0 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
bark==0.1.5 bark==0.1.5
grpcio==1.68.1 grpcio==1.66.1
protobuf protobuf
certifi certifi

View File

@@ -1,9 +1,8 @@
.DEFAULT_GOAL := install .DEFAULT_GOAL := install
.PHONY: install .PHONY: install
install: install: protogen
bash install.sh bash install.sh
$(MAKE) protogen
.PHONY: protogen .PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py protogen: backend_pb2_grpc.py backend_pb2.py
@@ -13,7 +12,7 @@ protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py $(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py: backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean .PHONY: clean
clean: protogen-clean clean: protogen-clean

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,3 +1,2 @@
grpcio==1.68.1 grpcio==1.66.1
protobuf protobuf
grpcio-tools

View File

@@ -1,4 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
coqui-tts

View File

@@ -1,6 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
torchaudio==2.4.1+cu118 torchaudio
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -1,5 +1,4 @@
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -1,6 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
torchaudio==2.4.1+rocm6.0 torchaudio
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -3,7 +3,6 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1 TTS==0.22.0
grpcio==1.66.1
protobuf protobuf
certifi certifi
packaging==24.1

View File

@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server This method sets up the gRPC service by starting the server
""" """
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(30) time.sleep(10)
def tearDown(self) -> None: def tearDown(self) -> None:
""" """

View File

@@ -247,16 +247,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
use_safetensors=True, use_safetensors=True,
variant=variant) variant=variant)
elif request.PipelineType == "FluxPipeline": elif request.PipelineType == "FluxPipeline":
if fromSingleFile:
self.pipe = FluxPipeline.from_single_file(modelFile,
torch_dtype=torchType,
use_safetensors=True)
else:
self.pipe = FluxPipeline.from_pretrained( self.pipe = FluxPipeline.from_pretrained(
request.Model, request.Model,
torch_dtype=torch.bfloat16) torch_dtype=torch.bfloat16)
if request.LowVRAM: if request.LowVRAM:
self.pipe.enable_model_cpu_offload() self.pipe.enable_model_cpu_offload()
elif request.PipelineType == "FluxTransformer2DModel": elif request.PipelineType == "FluxTransformer2DModel":
dtype = torch.bfloat16 dtype = torch.bfloat16
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev" # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
@@ -301,34 +296,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.controlnet = self.controlnet self.pipe.controlnet = self.controlnet
else: else:
self.controlnet = None self.controlnet = None
# Assume directory from request.ModelFile.
if request.LoraAdapter and not os.path.isabs(request.LoraAdapter): # Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
# get base path of modelFile
modelFileBase = os.path.dirname(request.ModelFile)
# modify LoraAdapter to be relative to modelFileBase # modify LoraAdapter to be relative to modelFileBase
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter) request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
device = "cpu" if not request.CUDA else "cuda" device = "cpu" if not request.CUDA else "cuda"
self.device = device self.device = device
if request.LoraAdapter: if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file ) # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter): if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
self.pipe.load_lora_weights(request.LoraAdapter) self.pipe.load_lora_weights(request.LoraAdapter)
else: else:
self.pipe.unet.load_attn_procs(request.LoraAdapter) self.pipe.unet.load_attn_procs(request.LoraAdapter)
if len(request.LoraAdapters) > 0:
i = 0
adapters_name = []
adapters_weights = []
for adapter in request.LoraAdapters:
if not os.path.isabs(adapter):
adapter = os.path.join(request.ModelPath, adapter)
self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
adapters_name.append(f"adapter_{i}")
i += 1
for adapters_weight in request.LoraScales:
adapters_weights.append(adapters_weight)
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
if request.CUDA: if request.CUDA:
self.pipe.to('cuda') self.pipe.to('cuda')
@@ -409,6 +392,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of values for the parameters # create a dictionary of values for the parameters
options = { options = {
"negative_prompt": request.negative_prompt, "negative_prompt": request.negative_prompt,
"width": request.width,
"height": request.height,
"num_inference_steps": steps, "num_inference_steps": steps,
} }
@@ -426,13 +411,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
keys = options.keys() keys = options.keys()
if request.EnableParameters != "": if request.EnableParameters != "":
keys = [key.strip() for key in request.EnableParameters.split(",")] keys = request.EnableParameters.split(",")
if request.EnableParameters == "none": if request.EnableParameters == "none":
keys = [] keys = []
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
kwargs = {key: options.get(key) for key in keys if key in options} kwargs = {key: options[key] for key in keys}
# Set seed # Set seed
if request.seed > 0: if request.seed > 0:
@@ -443,12 +428,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if self.PipelineType == "FluxPipeline": if self.PipelineType == "FluxPipeline":
kwargs["max_sequence_length"] = 256 kwargs["max_sequence_length"] = 256
if request.width:
kwargs["width"] = request.width
if request.height:
kwargs["height"] = request.height
if self.PipelineType == "FluxTransformer2DModel": if self.PipelineType == "FluxTransformer2DModel":
kwargs["output_type"] = "pil" kwargs["output_type"] = "pil"
kwargs["generator"] = torch.Generator("cpu").manual_seed(0) kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
@@ -468,7 +447,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
export_to_video(video_frames, request.dst) export_to_video(video_frames, request.dst)
return backend_pb2.Result(message="Media generated successfully", success=True) return backend_pb2.Result(message="Media generated successfully", success=True)
print(f"Generating image with {kwargs=}", file=sys.stderr)
image = {} image = {}
if COMPEL: if COMPEL:
conditioning, pooled = self.compel.build_conditioning_tensor(prompt) conditioning, pooled = self.compel.build_conditioning_tensor(prompt)

View File

@@ -5,5 +5,5 @@ accelerate
compel compel
peft peft
sentencepiece sentencepiece
torch==2.4.1 torch
optimum-quanto optimum-quanto

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
diffusers diffusers
opencv-python opencv-python
transformers transformers

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
diffusers diffusers
opencv-python opencv-python
transformers transformers

View File

@@ -3,7 +3,7 @@ intel-extension-for-pytorch
torch torch
torchvision torchvision
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
diffusers diffusers
opencv-python opencv-python
transformers transformers

View File

@@ -1,5 +1,5 @@
setuptools setuptools
grpcio==1.68.1 grpcio==1.66.1
pillow pillow
protobuf protobuf
certifi certifi

View File

@@ -1,3 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch

View File

@@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
transformers transformers
accelerate accelerate

View File

@@ -1,3 +1,3 @@
torch==2.4.1 torch
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1 grpcio==1.66.1
protobuf protobuf
certifi certifi
wheel wheel

View File

@@ -1,2 +1,2 @@
torch==2.4.1 torch
transformers transformers

View File

@@ -1,3 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
transformers transformers

View File

@@ -1,2 +1,2 @@
torch==2.4.1 torch
transformers transformers

View File

@@ -1,3 +1,3 @@
grpcio==1.68.1 grpcio==1.66.1
protobuf protobuf
certifi certifi

View File

@@ -1,3 +1 @@
torch==2.4.1 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,4 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,3 +1 @@
torch==2.4.1 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,4 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -2,22 +2,22 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
grpcio==1.68.1 grpcio==1.66.1
protobuf protobuf
librosa==0.9.1 librosa==0.9.1
faster-whisper==0.9.0 faster-whisper==1.0.3
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.22.0 numpy==1.26.4
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect==7.0.0 inflect==7.0.0
unidecode==1.3.7 unidecode==1.3.7
whisper-timestamped==1.14.2 whisper-timestamped==1.15.4
openai openai
python-dotenv python-dotenv
pypinyin==0.50.0 pypinyin==0.50.0
cn2an==0.5.22 cn2an==0.5.22
jieba==0.42.1 jieba==0.42.1
gradio==4.38.1
langid==1.1.6 langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,10 +1,10 @@
grpcio==1.68.1 grpcio==1.66.1
protobuf protobuf
librosa librosa
faster-whisper faster-whisper
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.22.0 numpy
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect inflect
unidecode unidecode
@@ -13,8 +13,8 @@ openai
python-dotenv python-dotenv
pypinyin pypinyin
cn2an==0.5.22 cn2an==0.5.22
networkx==2.8.8
jieba==0.42.1 jieba==0.42.1
gradio==3.48.0 gradio
langid==1.1.6 langid==1.1.6
llvmlite==0.43.0 git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server This method sets up the gRPC service by starting the server
""" """
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(30) time.sleep(10)
def tearDown(self) -> None: def tearDown(self) -> None:
""" """

View File

@@ -12,10 +12,9 @@ export SKIP_CONDA=1
endif endif
.PHONY: parler-tts .PHONY: parler-tts
parler-tts: parler-tts: protogen
@echo "Installing $(CONDA_ENV_PATH)..." @echo "Installing $(CONDA_ENV_PATH)..."
bash install.sh $(CONDA_ENV_PATH) bash install.sh $(CONDA_ENV_PATH)
$(MAKE) protogen
.PHONY: run .PHONY: run
run: protogen run: protogen
@@ -37,7 +36,7 @@ protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py $(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py: backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean .PHONY: clean
clean: protogen-clean clean: protogen-clean

View File

@@ -11,18 +11,9 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi fi
installRequirements installRequirements
# https://github.com/descriptinc/audiotools/issues/101 # https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions. # incompatible protobuf versions.
PYDIR=python3.10 PYDIR=$(ls ${MY_DIR}/venv/lib)
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/" curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
if [ ! -d ${pyenv} ]; then
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
exit 1
fi
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,4 +1,3 @@
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17 git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
llvmlite==0.43.0 llvmlite==0.43.0
numba==0.60.0 numba==0.60.0
grpcio-tools==1.42.0

View File

@@ -1,3 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
torchaudio==2.4.1+cu118 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate

View File

@@ -1,3 +1,4 @@
grpcio==1.68.1 grpcio==1.66.1
protobuf
certifi certifi
llvmlite==0.43.0 llvmlite==0.43.0

View File

@@ -1,4 +1,4 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
rerankers[transformers] rerankers[transformers]

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
transformers transformers
accelerate accelerate
torch==2.4.1+cu118 torch
rerankers[transformers] rerankers[transformers]

View File

@@ -1,4 +1,4 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
rerankers[transformers] rerankers[transformers]

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
transformers transformers
accelerate accelerate
torch==2.4.1+rocm6.0 torch
rerankers[transformers] rerankers[transformers]

Some files were not shown because too many files have changed in this diff Show More