Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
83110891fd fix(go-grpc-server): always close resultChan
By not closing the channel, if a server not implementing PredictStream
receives a client call would hang indefinetly as would wait for
resultChan to be consumed.

If the prediction stream returns we close the channel now and we wait
for the goroutine to finish.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-05 00:07:58 +02:00
340 changed files with 10241 additions and 5189 deletions

View File

@@ -1,11 +0,0 @@
meta {
name: model delete
type: http
seq: 7
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: none
auth: none
}

View File

Binary file not shown.

View File

@@ -1,16 +0,0 @@
meta {
name: transcribe
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
body: multipartForm
auth: none
}
body:multipart-form {
file: @file(transcription/gb1.ogg)
model: whisper-1
}

1
.gitattributes vendored
View File

@@ -1,2 +1 @@
*.sh text eol=lf *.sh text eol=lf
backend/cpp/llama/*.hpp linguist-vendored

View File

@@ -6,7 +6,6 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"github.com/microcosm-cc/bluemonday"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@@ -280,12 +279,6 @@ func main() {
return return
} }
// Ensure that all arbitrary text content is sanitized before display
for i, m := range models {
models[i].Name = bluemonday.StrictPolicy().Sanitize(m.Name)
models[i].Description = bluemonday.StrictPolicy().Sanitize(m.Description)
}
// render the template // render the template
data := struct { data := struct {
Models []*GalleryModel Models []*GalleryModel

View File

@@ -9,8 +9,6 @@ updates:
directory: "/" directory: "/"
schedule: schedule:
interval: "weekly" interval: "weekly"
ignore:
- dependency-name: "github.com/mudler/LocalAI/pkg/grpc/proto"
- package-ecosystem: "github-actions" - package-ecosystem: "github-actions"
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
directory: "/" directory: "/"

View File

@@ -23,7 +23,7 @@ jobs:
sudo pip install --upgrade pip sudo pip install --upgrade pip
pip install huggingface_hub pip install huggingface_hub
- name: 'Setup yq' - name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.3.1 uses: dcarbone/install-yq-action@v1.1.1
with: with:
version: 'v4.44.2' version: 'v4.44.2'
download-compressed: true download-compressed: true

View File

@@ -33,7 +33,7 @@ jobs:
run: | run: |
CGO_ENABLED=0 make build-api CGO_ENABLED=0 make build-api
- name: rm - name: rm
uses: appleboy/ssh-action@v1.2.0 uses: appleboy/ssh-action@v1.0.3
with: with:
host: ${{ secrets.EXPLORER_SSH_HOST }} host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }} username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
rm: true rm: true
target: ./local-ai target: ./local-ai
- name: restarting - name: restarting
uses: appleboy/ssh-action@v1.2.0 uses: appleboy/ssh-action@v1.0.3
with: with:
host: ${{ secrets.EXPLORER_SSH_HOST }} host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }} username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -15,7 +15,7 @@ jobs:
strategy: strategy:
matrix: matrix:
include: include:
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
platforms: 'linux/amd64' platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}} runs-on: ${{matrix.runs-on}}

View File

@@ -79,7 +79,7 @@ jobs:
args: ${{ steps.summarize.outputs.message }} args: ${{ steps.summarize.outputs.message }}
- name: Setup tmate session if fails - name: Setup tmate session if fails
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -161,7 +161,7 @@ jobs:
TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }}
- name: Setup tmate session if fails - name: Setup tmate session if fails
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

View File

@@ -123,7 +123,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -232,7 +232,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -308,7 +308,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -350,7 +350,7 @@ jobs:
release/* release/*
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

View File

@@ -123,13 +123,6 @@ jobs:
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test make --jobs=5 --output-sync=target -C backend/python/parler-tts test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
tests-openvoice: tests-openvoice:
runs-on: ubuntu-latest runs-on: ubuntu-latest

View File

@@ -133,7 +133,7 @@ jobs:
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -197,7 +197,7 @@ jobs:
make run-e2e-aio make run-e2e-aio
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180
@@ -224,7 +224,7 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
pip install --user --no-cache-dir grpcio-tools pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test - name: Test
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include
@@ -235,7 +235,7 @@ jobs:
BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
- name: Setup tmate session if tests fail - name: Setup tmate session if tests fail
if: ${{ failure() }} if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19 uses: mxschmitt/action-tmate@v3.18
with: with:
detached: true detached: true
connect-timeout-seconds: 180 connect-timeout-seconds: 180

1
.gitignore vendored
View File

@@ -12,6 +12,7 @@ prepare-sources
go-ggml-transformers go-ggml-transformers
go-gpt2 go-gpt2
go-rwkv
whisper.cpp whisper.cpp
/bloomz /bloomz
go-bert go-bert

View File

@@ -9,8 +9,6 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root USER root
ARG GO_VERSION=1.22.6 ARG GO_VERSION=1.22.6
ARG CMAKE_VERSION=3.26.4
ARG CMAKE_FROM_SOURCE=false
ARG TARGETARCH ARG TARGETARCH
ARG TARGETVARIANT ARG TARGETVARIANT
@@ -23,25 +21,13 @@ RUN apt-get update && \
build-essential \ build-essential \
ccache \ ccache \
ca-certificates \ ca-certificates \
curl libssl-dev \ cmake \
curl \
git \ git \
unzip upx-ucl && \ unzip upx-ucl && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# Install Go # Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
@@ -85,8 +71,7 @@ WORKDIR /build
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it # The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
FROM requirements-core AS requirements-extras FROM requirements-core AS requirements-extras
# Install uv as a system package RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
ENV PATH="/root/.cargo/bin:${PATH}" ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
@@ -203,8 +188,6 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.65.0 ARG GRPC_VERSION=v1.65.0
ARG CMAKE_FROM_SOURCE=false
ARG CMAKE_VERSION=3.26.4
ENV MAKEFLAGS=${GRPC_MAKEFLAGS} ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
@@ -213,24 +196,12 @@ WORKDIR /build
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
ca-certificates \ ca-certificates \
build-essential curl libssl-dev \ build-essential \
cmake \
git && \ git && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Install CMake (the version in 22.04 is too old)
RUN <<EOT bash
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
else
apt-get update && \
apt-get install -y \
cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
# We install GRPC to a different prefix here so that we can copy in only the build artifacts later # We install GRPC to a different prefix here so that we can copy in only the build artifacts later
# saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree # saves several hundred MB on the final docker image size vs copying in the entire GRPC source tree
# and running make install in the target container # and running make install in the target container

110
Makefile
View File

@@ -8,11 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445 CPPLLAMA_VERSION?=d5ed2b929d85bbd7dbeecb690880f07d9d7a6077
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d WHISPER_CPP_VERSION?=ccc2547210e09e3a1785817383ab770389bb442b
# bert.cpp version # bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -20,7 +24,7 @@ BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
# go-piper version # go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0 PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
# stablediffusion version # stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
@@ -30,10 +34,6 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
ONNX_OS?=linux
export BUILD_TYPE?= export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE) export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export CMAKE_ARGS?= export CMAKE_ARGS?=
@@ -45,7 +45,6 @@ CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/ CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?= GO_TAGS?=
BUILD_ID?= BUILD_ID?=
NATIVE?=false
TEST_DIR=/tmp/test TEST_DIR=/tmp/test
@@ -84,25 +83,7 @@ ifndef UNAME_S
UNAME_S := $(shell uname -s) UNAME_S := $(shell uname -s)
endif endif
# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
ifeq ($(NATIVE),false)
CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Detect if we are running on arm64
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=aarch64
endif
ifeq ($(OS),Darwin) ifeq ($(OS),Darwin)
ONNX_OS=osx
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=arm64
else ifneq (,$(findstring arm64,$(shell uname -m)))
ONNX_ARCH=arm64
else
ONNX_ARCH=x86_64
endif
ifeq ($(OSX_SIGNING_IDENTITY),) ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/') OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
@@ -157,10 +138,10 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here. # llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE= export STABLE_BUILD_TYPE=
export GGML_HIP=1 export GGML_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)" AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif endif
@@ -205,9 +186,9 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends # Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS)) ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
@@ -267,6 +248,20 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## RWKV
sources/go-rwkv.cpp:
mkdir -p sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && \
git init && \
git remote add origin $(RWKV_REPO) && \
git fetch origin && \
git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## stable diffusion ## stable diffusion
sources/go-stable-diffusion: sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion mkdir -p sources/go-stable-diffusion
@@ -280,20 +275,6 @@ sources/go-stable-diffusion:
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
sources/onnxruntime:
mkdir -p sources/onnxruntime
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
ifeq ($(OS),Darwin)
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
else
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
endif
## tiny-dream ## tiny-dream
sources/go-tiny-dream: sources/go-tiny-dream:
mkdir -p sources/go-tiny-dream mkdir -p sources/go-tiny-dream
@@ -320,9 +301,10 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
replace: replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
@@ -332,6 +314,7 @@ replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace: dropreplace:
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go $(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
@@ -347,6 +330,7 @@ prepare-sources: get-sources replace
rebuild: ## Rebuilds the project rebuild: ## Rebuilds the project
$(GOCMD) clean -cache $(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean $(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean $(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean $(MAKE) -C sources/go-bert.cpp clean
@@ -455,6 +439,8 @@ test-models/testmodel.ggml:
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
cp tests/models_fixtures/* test-models cp tests/models_fixtures/* test-models
prepare-test: grpcs prepare-test: grpcs
@@ -484,13 +470,13 @@ run-e2e-image:
run-e2e-aio: protogen-go run-e2e-aio: protogen-go
@echo 'Running e2e AIO tests' @echo 'Running e2e AIO tests'
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e-aio $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
test-e2e: test-e2e:
@echo 'Running e2e tests' @echo 'Running e2e tests'
BUILD_TYPE=$(BUILD_TYPE) \ BUILD_TYPE=$(BUILD_TYPE) \
LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) -v -r ./tests/e2e $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e
teardown-e2e: teardown-e2e:
rm -rf $(TEST_DIR) || true rm -rf $(TEST_DIR) || true
@@ -498,24 +484,24 @@ teardown-e2e:
test-llama: prepare-test test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama-gguf: prepare-test test-llama-gguf: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts 5 -v -r $(TEST_PATHS)
test-tts: prepare-test test-tts: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r $(TEST_PATHS)
test-stablediffusion: prepare-test test-stablediffusion: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS) $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)
test-stores: backend-assets/grpc/local-store test-stores: backend-assets/grpc/local-store
mkdir -p tests/integration/backend-assets/grpc mkdir -p tests/integration/backend-assets/grpc
cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/ cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts $(TEST_FLAKES) -v -r tests/integration $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
test-container: test-container:
docker build --target requirements -t local-ai-test-container . docker build --target requirements -t local-ai-test-container .
@@ -775,7 +761,7 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
# TODO: every binary should have its own folder instead, so can have different metal implementations # TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal) ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/ cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -789,7 +775,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
cp -rf backend/cpp/llama backend/cpp/llama-hipblas cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge $(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET}) $(info ${GREEN}I llama-cpp build info:hipblas${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -831,6 +817,13 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper $(UPX) backend-assets/grpc/piper
endif endif
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/rwkv
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \ CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
@@ -838,13 +831,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion $(UPX) backend-assets/grpc/stablediffusion
endif endif
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/silero-vad
endif
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \ CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
@@ -905,7 +891,7 @@ docker-aio-all:
docker-image-intel: docker-image-intel:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -913,7 +899,7 @@ docker-image-intel:
docker-image-intel-xpu: docker-image-intel-xpu:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \

View File

@@ -38,13 +38,9 @@
</a> </a>
</p> </p>
<p align="center">
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/) > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
> >
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) > [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai) [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -60,40 +56,20 @@ curl https://localai.io/install.sh | sh
Or run with docker: Or run with docker:
```bash ```bash
# CPU only image:
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
# Nvidia GPU:
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
# CPU and GPU image (bigger size):
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
``` # Alternative images:
# - if you have an Nvidia GPU:
To load models: # docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
# - without preconfigured models
```bash # docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io) # - without preconfigured models for Nvidia GPUs
local-ai run llama-3.2-1b-instruct:q4_k_m # docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
# Start LocalAI with the phi-2 model directly from huggingface
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
# Install and run a model from the Ollama OCI registry
local-ai run ollama://gemma:2b
# Run a model from a configuration file
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
local-ai run oci://localai/phi-2:latest
``` ```
[💻 Getting started](https://localai.io/basics/getting_started/index.html) [💻 Getting started](https://localai.io/basics/getting_started/index.html)
## 📰 Latest project news ## 📰 Latest project news
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io) - Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723 - July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io - June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
@@ -164,9 +140,6 @@ Other:
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Another Telegram Bot https://github.com/JackBekket/Hellper
- Auto-documentation https://github.com/JackBekket/Reflexia
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
- Github Actions: https://github.com/marketplace/actions/start-localai - Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/ - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
@@ -241,6 +214,7 @@ LocalAI couldn't have been built without the help of great software already avai
- https://github.com/antimatter15/alpaca.cpp - https://github.com/antimatter15/alpaca.cpp
- https://github.com/EdVince/Stable-Diffusion-NCNN - https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/ggerganov/whisper.cpp - https://github.com/ggerganov/whisper.cpp
- https://github.com/saharNooby/rwkv.cpp
- https://github.com/rhasspy/piper - https://github.com/rhasspy/piper
## 🤗 Contributors ## 🤗 Contributors

View File

@@ -28,8 +28,6 @@ service Backend {
rpc Rerank(RerankRequest) returns (RerankResult) {} rpc Rerank(RerankRequest) returns (RerankResult) {}
rpc GetMetrics(MetricsRequest) returns (MetricsResponse); rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
rpc VAD(VADRequest) returns (VADResponse) {}
} }
// Define the empty request // Define the empty request
@@ -221,7 +219,6 @@ message ModelOptions {
int32 SwapSpace = 53; int32 SwapSpace = 53;
int32 MaxModelLen = 54; int32 MaxModelLen = 54;
int32 TensorParallelSize = 55; int32 TensorParallelSize = 55;
string LoadFormat = 58;
string MMProj = 41; string MMProj = 41;
@@ -235,11 +232,6 @@ message ModelOptions {
bool FlashAttention = 56; bool FlashAttention = 56;
bool NoKVOffload = 57; bool NoKVOffload = 57;
string ModelPath = 59;
repeated string LoraAdapters = 60;
repeated float LoraScales = 61;
} }
message Result { message Result {
@@ -295,19 +287,6 @@ message TTSRequest {
optional string language = 5; optional string language = 5;
} }
message VADRequest {
repeated float audio = 1;
}
message VADSegment {
float start = 1;
float end = 2;
}
message VADResponse {
repeated VADSegment segments = 1;
}
message SoundGenerationRequest { message SoundGenerationRequest {
string text = 1; string text = 1;
string model = 2; string model = 2;

View File

@@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas) else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIP=ON CMAKE_ARGS+=-DGGML_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here # But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin) else ifeq ($(OS),Darwin)

View File

@@ -113,7 +113,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret; std::string ret;
for (; begin != end; ++begin) for (; begin != end; ++begin)
{ {
ret += common_token_to_piece(ctx, *begin); ret += llama_token_to_piece(ctx, *begin);
} }
return ret; return ret;
} }
@@ -121,7 +121,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
// format incomplete utf-8 multibyte character for output // format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token) static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{ {
std::string out = token == -1 ? "" : common_token_to_piece(ctx, token); std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character // if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token) // (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80) if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -203,8 +203,8 @@ struct llama_client_slot
std::string stopping_word; std::string stopping_word;
// sampling // sampling
struct common_params_sampling sparams; struct gpt_sampler_params sparams;
common_sampler *ctx_sampling = nullptr; gpt_sampler *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state int32_t ga_i = 0; // group-attention state
int32_t ga_n = 1; // group-attention factor int32_t ga_n = 1; // group-attention factor
@@ -257,7 +257,7 @@ struct llama_client_slot
images.clear(); images.clear();
} }
bool has_budget(common_params &global_params) { bool has_budget(gpt_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1) if (params.n_predict == -1 && global_params.n_predict == -1)
{ {
return true; // limitless return true; // limitless
@@ -391,39 +391,6 @@ struct llama_metrics {
} }
}; };
struct llava_embd_batch {
std::vector<llama_pos> pos;
std::vector<int32_t> n_seq_id;
std::vector<llama_seq_id> seq_id_0;
std::vector<llama_seq_id *> seq_ids;
std::vector<int8_t> logits;
llama_batch batch;
llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
pos .resize(n_tokens);
n_seq_id.resize(n_tokens);
seq_ids .resize(n_tokens + 1);
logits .resize(n_tokens);
seq_id_0.resize(1);
seq_id_0[0] = seq_id;
seq_ids [n_tokens] = nullptr;
batch = {
/*n_tokens =*/ n_tokens,
/*tokens =*/ nullptr,
/*embd =*/ embd,
/*pos =*/ pos.data(),
/*n_seq_id =*/ n_seq_id.data(),
/*seq_id =*/ seq_ids.data(),
/*logits =*/ logits.data(),
};
for (int i = 0; i < n_tokens; i++) {
batch.pos [i] = pos_0 + i;
batch.n_seq_id[i] = 1;
batch.seq_id [i] = seq_id_0.data();
batch.logits [i] = false;
}
}
};
struct llama_server_context struct llama_server_context
{ {
llama_model *model = nullptr; llama_model *model = nullptr;
@@ -431,7 +398,7 @@ struct llama_server_context
clip_ctx *clp_ctx = nullptr; clip_ctx *clp_ctx = nullptr;
common_params params; gpt_params params;
llama_batch batch; llama_batch batch;
@@ -474,7 +441,7 @@ struct llama_server_context
} }
} }
bool load_model(const common_params &params_) bool load_model(const gpt_params &params_)
{ {
params = params_; params = params_;
if (!params.mmproj.empty()) { if (!params.mmproj.empty()) {
@@ -491,9 +458,9 @@ struct llama_server_context
} }
} }
common_init_result common_init = common_init_from_params(params); llama_init_result llama_init = llama_init_from_gpt_params(params);
model = common_init.model; model = llama_init.model;
ctx = common_init.context; ctx = llama_init.context;
if (model == nullptr) if (model == nullptr)
{ {
LOG_ERR("unable to load model: %s", params.model.c_str()); LOG_ERR("unable to load model: %s", params.model.c_str());
@@ -611,12 +578,12 @@ struct llama_server_context
std::vector<llama_token> p; std::vector<llama_token> p;
if (first) if (first)
{ {
p = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
first = false; first = false;
} }
else else
{ {
p = common_tokenize(ctx, s, false, TMP_FORCE_SPECIAL); p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
} }
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end()); prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
} }
@@ -633,7 +600,7 @@ struct llama_server_context
else else
{ {
auto s = json_prompt.template get<std::string>(); auto s = json_prompt.template get<std::string>();
prompt_tokens = common_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL); prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
} }
return prompt_tokens; return prompt_tokens;
@@ -662,7 +629,7 @@ struct llama_server_context
bool launch_slot_with_data(llama_client_slot* &slot, json data) { bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params; slot_params default_params;
common_params_sampling default_sparams; gpt_sampler_params default_sparams;
slot->params.stream = json_value(data, "stream", false); slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false); slot->params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -670,6 +637,7 @@ struct llama_server_context
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p); slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p); slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range); slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
@@ -801,7 +769,7 @@ struct llama_server_context
} }
else if (el[0].is_string()) else if (el[0].is_string())
{ {
auto toks = common_tokenize(model, el[0].get<std::string>(), false); auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks) for (auto tok : toks)
{ {
slot->sparams.logit_bias.push_back({tok, bias}); slot->sparams.logit_bias.push_back({tok, bias});
@@ -833,7 +801,7 @@ struct llama_server_context
sampler_names.emplace_back(name); sampler_names.emplace_back(name);
} }
} }
slot->sparams.samplers = common_sampler_types_from_names(sampler_names, false); slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
} }
else else
{ {
@@ -917,9 +885,9 @@ struct llama_server_context
if (slot->ctx_sampling != nullptr) if (slot->ctx_sampling != nullptr)
{ {
common_sampler_free(slot->ctx_sampling); gpt_sampler_free(slot->ctx_sampling);
} }
slot->ctx_sampling = common_sampler_init(model, slot->sparams); slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
//llama_set_rng_seed(ctx, slot->params.seed); //llama_set_rng_seed(ctx, slot->params.seed);
slot->command = LOAD_PROMPT; slot->command = LOAD_PROMPT;
@@ -946,13 +914,13 @@ struct llama_server_context
system_tokens.clear(); system_tokens.clear();
if (!system_prompt.empty()) { if (!system_prompt.empty()) {
system_tokens = common_tokenize(ctx, system_prompt, add_bos_token); system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
common_batch_clear(batch); llama_batch_clear(batch);
for (int i = 0; i < (int)system_tokens.size(); ++i) for (int i = 0; i < (int)system_tokens.size(); ++i)
{ {
common_batch_add(batch, system_tokens[i], i, { 0 }, false); llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
} }
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
@@ -966,6 +934,7 @@ struct llama_server_context
batch.n_seq_id + i, batch.n_seq_id + i,
batch.seq_id + i, batch.seq_id + i,
batch.logits + i, batch.logits + i,
0, 0, 0, // unused
}; };
if (llama_decode(ctx, batch_view) != 0) if (llama_decode(ctx, batch_view) != 0)
{ {
@@ -1040,7 +1009,7 @@ struct llama_server_context
bool process_token(completion_token_output &result, llama_client_slot &slot) { bool process_token(completion_token_output &result, llama_client_slot &slot) {
// remember which tokens were sampled - used for repetition penalties during sampling // remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = common_token_to_piece(ctx, result.tok); const std::string token_str = llama_token_to_piece(ctx, result.tok);
slot.sampled = result.tok; slot.sampled = result.tok;
// search stop word and delete it // search stop word and delete it
@@ -1191,7 +1160,7 @@ struct llama_server_context
samplers.reserve(slot.sparams.samplers.size()); samplers.reserve(slot.sparams.samplers.size());
for (const auto & sampler : slot.sparams.samplers) for (const auto & sampler : slot.sparams.samplers)
{ {
samplers.emplace_back(common_sampler_type_to_str(sampler)); samplers.emplace_back(gpt_sampler_type_to_str(sampler));
} }
return json { return json {
@@ -1205,6 +1174,7 @@ struct llama_server_context
{"top_k", slot.sparams.top_k}, {"top_k", slot.sparams.top_k},
{"top_p", slot.sparams.top_p}, {"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p}, {"min_p", slot.sparams.min_p},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typ_p}, {"typical_p", slot.sparams.typ_p},
{"repeat_last_n", slot.sparams.penalty_last_n}, {"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat}, {"repeat_penalty", slot.sparams.penalty_repeat},
@@ -1246,7 +1216,7 @@ struct llama_server_context
if (slot.sparams.n_probs > 0) if (slot.sparams.n_probs > 0)
{ {
std::vector<completion_token_output> probs_output = {}; std::vector<completion_token_output> probs_output = {};
const std::vector<llama_token> to_send_toks = common_tokenize(ctx, tkn.text_to_send, false); const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
if (probs_pos < probs_stop_pos) if (probs_pos < probs_stop_pos)
@@ -1298,7 +1268,7 @@ struct llama_server_context
std::vector<completion_token_output> probs = {}; std::vector<completion_token_output> probs = {};
if (!slot.params.stream && slot.stopped_word) if (!slot.params.stream && slot.stopped_word)
{ {
const std::vector<llama_token> stop_word_toks = common_tokenize(ctx, slot.stopping_word, false); const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size()); probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
} }
else else
@@ -1409,6 +1379,7 @@ struct llama_server_context
batch.n_seq_id + i, batch.n_seq_id + i,
batch.seq_id + i, batch.seq_id + i,
batch.logits + i, batch.logits + i,
0, 0, 0, // unused
}; };
if (llama_decode(ctx, batch_view)) if (llama_decode(ctx, batch_view))
{ {
@@ -1427,9 +1398,8 @@ struct llama_server_context
} }
const int n_embd = llama_n_embd(model); const int n_embd = llama_n_embd(model);
float * embd = img.image_embedding + i * n_embd; llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, slot.n_past, 0); if (llama_decode(ctx, batch_img))
if (llama_decode(ctx, llava_batch.batch))
{ {
LOG("%s : failed to eval image\n", __func__); LOG("%s : failed to eval image\n", __func__);
return false; return false;
@@ -1438,7 +1408,7 @@ struct llama_server_context
} }
image_idx++; image_idx++;
common_batch_clear(batch); llama_batch_clear(batch);
// append prefix of next image // append prefix of next image
const auto json_prompt = (image_idx >= (int) slot.images.size()) ? const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
@@ -1448,7 +1418,7 @@ struct llama_server_context
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
for (int i = 0; i < (int) append_tokens.size(); ++i) for (int i = 0; i < (int) append_tokens.size(); ++i)
{ {
common_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true); llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_past += 1; slot.n_past += 1;
} }
} }
@@ -1580,7 +1550,7 @@ struct llama_server_context
update_system_prompt(); update_system_prompt();
} }
common_batch_clear(batch); llama_batch_clear(batch);
if (all_slots_are_idle) if (all_slots_are_idle)
{ {
@@ -1658,7 +1628,7 @@ struct llama_server_context
// TODO: we always have to take into account the "system_tokens" // TODO: we always have to take into account the "system_tokens"
// this is not great and needs to be improved somehow // this is not great and needs to be improved somehow
common_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true); llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
slot.n_past += 1; slot.n_past += 1;
} }
@@ -1752,7 +1722,7 @@ struct llama_server_context
if (!slot.params.cache_prompt) if (!slot.params.cache_prompt)
{ {
common_sampler_reset(slot.ctx_sampling); gpt_sampler_reset(slot.ctx_sampling);
slot.n_past = 0; slot.n_past = 0;
slot.n_past_se = 0; slot.n_past_se = 0;
@@ -1764,7 +1734,7 @@ struct llama_server_context
// push the prompt into the sampling context (do not apply grammar) // push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens) for (auto &token : prompt_tokens)
{ {
common_sampler_accept(slot.ctx_sampling, token, false); gpt_sampler_accept(slot.ctx_sampling, token, false);
} }
slot.n_past = common_part(slot.cache_tokens, prompt_tokens); slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1856,7 +1826,7 @@ struct llama_server_context
ga_i += ga_w/ga_n; ga_i += ga_w/ga_n;
} }
} }
common_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false); llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, {slot.id }, false);
slot_npast++; slot_npast++;
} }
@@ -1934,6 +1904,7 @@ struct llama_server_context
batch.n_seq_id + i, batch.n_seq_id + i,
batch.seq_id + i, batch.seq_id + i,
batch.logits + i, batch.logits + i,
0, 0, 0, // unused
}; };
const int ret = llama_decode(ctx, batch_view); const int ret = llama_decode(ctx, batch_view);
@@ -1972,9 +1943,9 @@ struct llama_server_context
} }
completion_token_output result; completion_token_output result;
const llama_token id = common_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i); const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);
common_sampler_accept(slot.ctx_sampling, id, true); gpt_sampler_accept(slot.ctx_sampling, id, true);
slot.n_decoded += 1; slot.n_decoded += 1;
if (slot.n_decoded == 1) if (slot.n_decoded == 1)
@@ -1985,7 +1956,7 @@ struct llama_server_context
} }
result.tok = id; result.tok = id;
const auto * cur_p = common_sampler_get_candidates(slot.ctx_sampling); const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);
for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) { for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
result.probs.push_back({ result.probs.push_back({
@@ -2038,7 +2009,7 @@ static json format_partial_response(
struct token_translator struct token_translator
{ {
llama_context * ctx; llama_context * ctx;
std::string operator()(llama_token tok) const { return common_token_to_piece(ctx, tok); } std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); } std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
}; };
@@ -2103,6 +2074,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict); // slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k); // slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p); // slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
// slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p); // slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp); // slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n); // slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
@@ -2126,6 +2098,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens(); data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
data["top_k"] = predict->topk(); data["top_k"] = predict->topk();
data["top_p"] = predict->topp(); data["top_p"] = predict->topp();
data["tfs_z"] = predict->tailfreesamplingz();
data["typical_p"] = predict->typicalp(); data["typical_p"] = predict->typicalp();
data["temperature"] = predict->temperature(); data["temperature"] = predict->temperature();
data["repeat_last_n"] = predict->repeat(); data["repeat_last_n"] = predict->repeat();
@@ -2172,6 +2145,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens(); // llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
// llama.params.sparams.top_k = predict->topk(); // llama.params.sparams.top_k = predict->topk();
// llama.params.sparams.top_p = predict->topp(); // llama.params.sparams.top_p = predict->topp();
// llama.params.sparams.tfs_z = predict->tailfreesamplingz();
// llama.params.sparams.typical_p = predict->typicalp(); // llama.params.sparams.typical_p = predict->typicalp();
// llama.params.sparams.penalty_last_n = predict->repeat(); // llama.params.sparams.penalty_last_n = predict->repeat();
// llama.params.sparams.temp = predict->temperature(); // llama.params.sparams.temp = predict->temperature();
@@ -2229,7 +2203,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// } // }
static void params_parse(const backend::ModelOptions* request, static void params_parse(const backend::ModelOptions* request,
common_params & params) { gpt_params & params) {
// this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809 // this is comparable to: https://github.com/ggerganov/llama.cpp/blob/d9b33fe95bd257b36c84ee5769cc048230067d6f/examples/server/server.cpp#L1809
@@ -2299,7 +2273,6 @@ static void params_parse(const backend::ModelOptions* request,
params.use_mmap = request->mmap(); params.use_mmap = request->mmap();
params.flash_attn = request->flashattention(); params.flash_attn = request->flashattention();
params.no_kv_offload = request->nokvoffload(); params.no_kv_offload = request->nokvoffload();
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
params.embedding = request->embeddings(); params.embedding = request->embeddings();
@@ -2338,7 +2311,7 @@ public:
grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) { grpc::Status LoadModel(ServerContext* context, const backend::ModelOptions* request, backend::Result* result) {
// Implement LoadModel RPC // Implement LoadModel RPC
common_params params; gpt_params params;
params_parse(request, params); params_parse(request, params);
llama_backend_init(); llama_backend_init();

View File

@@ -15,7 +15,7 @@ var (
func main() { func main() {
flag.Parse() flag.Parse()
if err := grpc.StartServer(*addr, &VAD{}); err != nil { if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err) panic(err)
} }
} }

View File

@@ -0,0 +1,95 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"path/filepath"
"github.com/donomii/go-rwkv.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
const tokenizerSuffix = ".tokenizer.json"
type LLM struct {
base.SingleThread
rwkv *rwkv.RwkvState
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
tokenizerFile := opts.Tokenizer
if tokenizerFile == "" {
modelFile := filepath.Base(opts.ModelFile)
tokenizerFile = modelFile + tokenizerSuffix
}
modelPath := filepath.Dir(opts.ModelFile)
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
if model == nil {
return fmt.Errorf("rwkv could not load model")
}
llm.rwkv = model
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
return "", err
}
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
return response, nil
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
fmt.Println("Error processing input: ", err)
return
}
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
results <- s
return true
})
close(results)
}()
return nil
}
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
if err != nil {
return pb.TokenizationResponse{}, err
}
l := len(tokens)
i32Tokens := make([]int32, l)
for i, t := range tokens {
i32Tokens[i] = int32(t.ID)
}
return pb.TokenizationResponse{
Length: int32(l),
Tokens: i32Tokens,
}, nil
}

View File

@@ -1,54 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/streamer45/silero-vad-go/speech"
)
type VAD struct {
base.SingleThread
detector *speech.Detector
}
func (vad *VAD) Load(opts *pb.ModelOptions) error {
v, err := speech.NewDetector(speech.DetectorConfig{
ModelPath: opts.ModelFile,
SampleRate: 16000,
//WindowSize: 1024,
Threshold: 0.5,
MinSilenceDurationMs: 0,
SpeechPadMs: 0,
})
if err != nil {
return fmt.Errorf("create silero detector: %w", err)
}
vad.detector = v
return err
}
func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
audio := req.Audio
segments, err := vad.detector.Detect(audio)
if err != nil {
return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
}
vadSegments := []*pb.VADSegment{}
for _, s := range segments {
vadSegments = append(vadSegments, &pb.VADSegment{
Start: float32(s.SpeechStartAt),
End: float32(s.SpeechEndAt),
})
}
return pb.VADResponse{
Segments: vadSegments,
}, nil
}

View File

@@ -1,2 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch

View File

@@ -1 +1 @@
torch==2.4.1 torch

View File

@@ -1,2 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch

View File

@@ -1,6 +1,6 @@
accelerate accelerate
auto-gptq==0.7.1 auto-gptq==0.7.1
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi
transformers transformers

View File

@@ -1,4 +1,4 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
torchaudio==2.4.1+cu118 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
torchaudio==2.4.1+rocm6.0 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
bark==0.1.5 bark==0.1.5
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi

View File

@@ -1,9 +1,8 @@
.DEFAULT_GOAL := install .DEFAULT_GOAL := install
.PHONY: install .PHONY: install
install: install: protogen
bash install.sh bash install.sh
$(MAKE) protogen
.PHONY: protogen .PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py protogen: backend_pb2_grpc.py backend_pb2.py
@@ -13,7 +12,7 @@ protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py $(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py: backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean .PHONY: clean
clean: protogen-clean clean: protogen-clean

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,3 +1,2 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
grpcio-tools

View File

@@ -1,4 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
coqui-tts

View File

@@ -1,6 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
torchaudio==2.4.1+cu118 torchaudio
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -1,5 +1,4 @@
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -1,6 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
torchaudio==2.4.1+rocm6.0 torchaudio
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -5,5 +5,4 @@ torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate
coqui-tts

View File

@@ -1,4 +1,4 @@
grpcio==1.68.0 coqui-tts
grpcio==1.66.2
protobuf protobuf
certifi certifi
packaging==24.1

View File

@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server This method sets up the gRPC service by starting the server
""" """
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(30) time.sleep(10)
def tearDown(self) -> None: def tearDown(self) -> None:
""" """

View File

@@ -247,16 +247,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
use_safetensors=True, use_safetensors=True,
variant=variant) variant=variant)
elif request.PipelineType == "FluxPipeline": elif request.PipelineType == "FluxPipeline":
if fromSingleFile:
self.pipe = FluxPipeline.from_single_file(modelFile,
torch_dtype=torchType,
use_safetensors=True)
else:
self.pipe = FluxPipeline.from_pretrained( self.pipe = FluxPipeline.from_pretrained(
request.Model, request.Model,
torch_dtype=torch.bfloat16) torch_dtype=torch.bfloat16)
if request.LowVRAM: if request.LowVRAM:
self.pipe.enable_model_cpu_offload() self.pipe.enable_model_cpu_offload()
elif request.PipelineType == "FluxTransformer2DModel": elif request.PipelineType == "FluxTransformer2DModel":
dtype = torch.bfloat16 dtype = torch.bfloat16
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev" # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
@@ -301,34 +296,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.controlnet = self.controlnet self.pipe.controlnet = self.controlnet
else: else:
self.controlnet = None self.controlnet = None
# Assume directory from request.ModelFile.
if request.LoraAdapter and not os.path.isabs(request.LoraAdapter): # Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
# get base path of modelFile
modelFileBase = os.path.dirname(request.ModelFile)
# modify LoraAdapter to be relative to modelFileBase # modify LoraAdapter to be relative to modelFileBase
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter) request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
device = "cpu" if not request.CUDA else "cuda" device = "cpu" if not request.CUDA else "cuda"
self.device = device self.device = device
if request.LoraAdapter: if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file ) # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter): if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
self.pipe.load_lora_weights(request.LoraAdapter) self.pipe.load_lora_weights(request.LoraAdapter)
else: else:
self.pipe.unet.load_attn_procs(request.LoraAdapter) self.pipe.unet.load_attn_procs(request.LoraAdapter)
if len(request.LoraAdapters) > 0:
i = 0
adapters_name = []
adapters_weights = []
for adapter in request.LoraAdapters:
if not os.path.isabs(adapter):
adapter = os.path.join(request.ModelPath, adapter)
self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
adapters_name.append(f"adapter_{i}")
i += 1
for adapters_weight in request.LoraScales:
adapters_weights.append(adapters_weight)
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
if request.CUDA: if request.CUDA:
self.pipe.to('cuda') self.pipe.to('cuda')
@@ -409,6 +392,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of values for the parameters # create a dictionary of values for the parameters
options = { options = {
"negative_prompt": request.negative_prompt, "negative_prompt": request.negative_prompt,
"width": request.width,
"height": request.height,
"num_inference_steps": steps, "num_inference_steps": steps,
} }
@@ -426,13 +411,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
keys = options.keys() keys = options.keys()
if request.EnableParameters != "": if request.EnableParameters != "":
keys = [key.strip() for key in request.EnableParameters.split(",")] keys = request.EnableParameters.split(",")
if request.EnableParameters == "none": if request.EnableParameters == "none":
keys = [] keys = []
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
kwargs = {key: options.get(key) for key in keys if key in options} kwargs = {key: options[key] for key in keys}
# Set seed # Set seed
if request.seed > 0: if request.seed > 0:
@@ -443,12 +428,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if self.PipelineType == "FluxPipeline": if self.PipelineType == "FluxPipeline":
kwargs["max_sequence_length"] = 256 kwargs["max_sequence_length"] = 256
if request.width:
kwargs["width"] = request.width
if request.height:
kwargs["height"] = request.height
if self.PipelineType == "FluxTransformer2DModel": if self.PipelineType == "FluxTransformer2DModel":
kwargs["output_type"] = "pil" kwargs["output_type"] = "pil"
kwargs["generator"] = torch.Generator("cpu").manual_seed(0) kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
@@ -468,7 +447,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
export_to_video(video_frames, request.dst) export_to_video(video_frames, request.dst)
return backend_pb2.Result(message="Media generated successfully", success=True) return backend_pb2.Result(message="Media generated successfully", success=True)
print(f"Generating image with {kwargs=}", file=sys.stderr)
image = {} image = {}
if COMPEL: if COMPEL:
conditioning, pooled = self.compel.build_conditioning_tensor(prompt) conditioning, pooled = self.compel.build_conditioning_tensor(prompt)

View File

@@ -5,5 +5,5 @@ accelerate
compel compel
peft peft
sentencepiece sentencepiece
torch==2.4.1 torch
optimum-quanto optimum-quanto

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
diffusers diffusers
opencv-python opencv-python
transformers transformers

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
diffusers diffusers
opencv-python opencv-python
transformers transformers

View File

@@ -1,5 +1,5 @@
setuptools setuptools
grpcio==1.68.0 grpcio==1.66.2
pillow pillow
protobuf protobuf
certifi certifi

View File

@@ -1,3 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch

View File

@@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
transformers transformers
accelerate accelerate

View File

@@ -1,3 +1,3 @@
torch==2.4.1 torch
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi
wheel wheel

View File

@@ -1,2 +1,2 @@
torch==2.4.1 torch
transformers transformers

View File

@@ -1,3 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
transformers transformers

View File

@@ -1,2 +1,2 @@
torch==2.4.1 torch
transformers transformers

View File

@@ -1,3 +1,3 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi

View File

@@ -1,3 +1 @@
torch==2.4.1 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,4 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,3 +1 @@
torch==2.4.1 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,4 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -2,22 +2,22 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
librosa==0.9.1 librosa==0.9.1
faster-whisper==0.9.0 faster-whisper==1.0.3
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.22.0 numpy==1.26.4
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect==7.0.0 inflect==7.0.0
unidecode==1.3.7 unidecode==1.3.7
whisper-timestamped==1.14.2 whisper-timestamped==1.15.4
openai openai
python-dotenv python-dotenv
pypinyin==0.50.0 pypinyin==0.50.0
cn2an==0.5.22 cn2an==0.5.22
jieba==0.42.1 jieba==0.42.1
gradio==4.44.1
langid==1.1.6 langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -1,10 +1,10 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
librosa librosa
faster-whisper faster-whisper
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.22.0 numpy
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect inflect
unidecode unidecode
@@ -13,8 +13,8 @@ openai
python-dotenv python-dotenv
pypinyin pypinyin
cn2an==0.5.22 cn2an==0.5.22
networkx==2.8.8
jieba==0.42.1 jieba==0.42.1
gradio==3.48.0 gradio
langid==1.1.6 langid==1.1.6
llvmlite==0.43.0 git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -19,7 +19,7 @@ class TestBackendServicer(unittest.TestCase):
This method sets up the gRPC service by starting the server This method sets up the gRPC service by starting the server
""" """
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(30) time.sleep(10)
def tearDown(self) -> None: def tearDown(self) -> None:
""" """

View File

@@ -12,10 +12,9 @@ export SKIP_CONDA=1
endif endif
.PHONY: parler-tts .PHONY: parler-tts
parler-tts: parler-tts: protogen
@echo "Installing $(CONDA_ENV_PATH)..." @echo "Installing $(CONDA_ENV_PATH)..."
bash install.sh $(CONDA_ENV_PATH) bash install.sh $(CONDA_ENV_PATH)
$(MAKE) protogen
.PHONY: run .PHONY: run
run: protogen run: protogen
@@ -37,7 +36,7 @@ protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py $(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py: backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean .PHONY: clean
clean: protogen-clean clean: protogen-clean

View File

@@ -11,10 +11,8 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi fi
installRequirements installRequirements
# https://github.com/descriptinc/audiotools/issues/101 # https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions. # incompatible protobuf versions.
PYDIR=python3.10 PYDIR=python3.10

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,4 +1,3 @@
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17 git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
llvmlite==0.43.0 llvmlite==0.43.0
numba==0.60.0 numba==0.60.0
grpcio-tools==1.42.0

View File

@@ -1,3 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
torchaudio==2.4.1+cu118 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio
transformers transformers
accelerate accelerate

View File

@@ -1,3 +1,4 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf
certifi certifi
llvmlite==0.43.0 llvmlite==0.43.0

View File

@@ -1,4 +1,4 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
rerankers[transformers] rerankers[transformers]

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
transformers transformers
accelerate accelerate
torch==2.4.1+cu118 torch
rerankers[transformers] rerankers[transformers]

View File

@@ -1,4 +1,4 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch
rerankers[transformers] rerankers[transformers]

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
transformers transformers
accelerate accelerate
torch==2.4.1+rocm6.0 torch
rerankers[transformers] rerankers[transformers]

View File

@@ -1,3 +1,3 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi

View File

@@ -1,6 +1,6 @@
torch==2.4.1 torch
accelerate accelerate
transformers transformers
bitsandbytes bitsandbytes
sentence-transformers==3.3.1 sentence-transformers==3.1.1
transformers transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
accelerate accelerate
sentence-transformers==3.3.1 sentence-transformers==3.1.1
transformers transformers

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
accelerate accelerate
sentence-transformers==3.3.1 sentence-transformers==3.1.1
transformers transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
accelerate accelerate
sentence-transformers==3.3.1 sentence-transformers==3.1.1
transformers transformers

View File

@@ -4,5 +4,5 @@ torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
accelerate accelerate
sentence-transformers==3.3.1 sentence-transformers==3.1.1
transformers transformers

View File

@@ -1,4 +1,4 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi
datasets datasets

View File

@@ -1,3 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch

View File

@@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
transformers transformers
accelerate accelerate
torch==2.4.1+cu118 torch

View File

@@ -1,3 +1,3 @@
transformers transformers
accelerate accelerate
torch==2.4.1 torch

View File

@@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
transformers transformers
accelerate accelerate
torch==2.4.1+rocm6.0 torch

View File

@@ -1,4 +1,4 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
scipy==1.14.0 scipy==1.14.0
certifi certifi

View File

@@ -72,12 +72,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
Returns: Returns:
A Result object that contains the result of the LoadModel operation. A Result object that contains the result of the LoadModel operation.
""" """
model_name = request.Model model_name = request.Model
# Check to see if the Model exists in the filesystem already.
if os.path.exists(request.ModelFile):
model_name = request.ModelFile
compute = torch.float16 compute = torch.float16
if request.F16Memory == True: if request.F16Memory == True:

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
accelerate accelerate
transformers transformers
bitsandbytes bitsandbytes

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118 torch
accelerate accelerate
transformers transformers
bitsandbytes bitsandbytes

View File

@@ -1,4 +1,4 @@
torch==2.4.1 torch
accelerate accelerate
transformers transformers
bitsandbytes bitsandbytes

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0 torch
accelerate accelerate
transformers transformers
bitsandbytes bitsandbytes

View File

@@ -1,4 +1,4 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,3 +1,3 @@
accelerate accelerate
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio

View File

@@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
accelerate accelerate
torch==2.4.1+cu118 torch
torchaudio==2.4.1+cu118 torchaudio

View File

@@ -1,3 +1,3 @@
accelerate accelerate
torch==2.4.1 torch
torchaudio==2.4.1 torchaudio

View File

@@ -1,3 +1,3 @@
grpcio==1.68.0 grpcio==1.66.2
protobuf protobuf
certifi certifi

View File

@@ -19,8 +19,6 @@ from vllm.utils import random_uuid
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.multimodal.utils import fetch_image from vllm.multimodal.utils import fetch_image
from vllm.assets.video import VideoAsset from vllm.assets.video import VideoAsset
import base64
import io
_ONE_DAY_IN_SECONDS = 60 * 60 * 24 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -95,8 +93,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.Quantization != "": if request.Quantization != "":
engine_args.quantization = request.Quantization engine_args.quantization = request.Quantization
if request.LoadFormat != "":
engine_args.load_format = request.LoadFormat
if request.GPUMemoryUtilization != 0: if request.GPUMemoryUtilization != 0:
engine_args.gpu_memory_utilization = request.GPUMemoryUtilization engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
if request.TrustRemoteCode: if request.TrustRemoteCode:
@@ -221,15 +217,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# Generate text using the LLM engine # Generate text using the LLM engine
request_id = random_uuid() request_id = random_uuid()
print(f"Generating text with request_id: {request_id}", file=sys.stderr) print(f"Generating text with request_id: {request_id}", file=sys.stderr)
multi_modal_data = {}
if image_data:
multi_modal_data["image"] = image_data
if video_data:
multi_modal_data["video"] = video_data
outputs = self.llm.generate( outputs = self.llm.generate(
{ {
"prompt": prompt, "prompt": prompt,
"multi_modal_data": multi_modal_data if multi_modal_data else None, "multi_modal_data": {
"image": image_data if image_data else None,
"video": video_data if video_data else None,
} if image_data or video_data else None,
}, },
sampling_params=sampling_params, sampling_params=sampling_params,
request_id=request_id, request_id=request_id,
@@ -268,22 +262,19 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
def load_image(self, image_path: str): def load_image(self, image_path: str):
""" """
Load an image from the given file path or base64 encoded data. Load an image from the given file path.
Args: Args:
image_path (str): The path to the image file or base64 encoded data. image_path (str): The path to the image file.
Returns: Returns:
Image: The loaded image. Image: The loaded image.
""" """
try: try:
return Image.open(image_path)
image_data = base64.b64decode(image_path)
image = Image.open(io.BytesIO(image_data))
return image
except Exception as e: except Exception as e:
print(f"Error loading image {image_path}: {e}", file=sys.stderr) print(f"Error loading image {image_path}: {e}", file=sys.stderr)
return None return self.load_video(image_path)
def load_video(self, video_path: str): def load_video(self, video_path: str):
""" """
@@ -296,15 +287,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
Video: The loaded video. Video: The loaded video.
""" """
try: try:
timestamp = str(int(time.time() * 1000)) # Generate timestamp video = VideoAsset(name=video_path).np_ndarrays
p = f"/tmp/vl-{timestamp}.data" # Use timestamp in filename
with open(p, "wb") as f:
f.write(base64.b64decode(video_path))
video = VideoAsset(name=p).np_ndarrays
os.remove(p)
return video return video
except Exception as e: except Exception as e:
print(f"Error loading video {video_path}: {e}", file=sys.stderr) print(f"Error loading video {image_path}: {e}", file=sys.stderr)
return None return None
async def serve(address): async def serve(address):

View File

@@ -13,16 +13,14 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi fi
# We don't embed this into the images as it is a large dependency and not always needed. if [ "x${BUILD_TYPE}" == "x" ]; then
# Besides, the speed inference are not actually usable in the current state for production use-cases.
if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
ensureVenv ensureVenv
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html # https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
if [ ! -d vllm ]; then if [ ! -d vllm ]; then
git clone https://github.com/vllm-project/vllm git clone https://github.com/vllm-project/vllm
fi fi
pushd vllm pushd vllm
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.0 protobuf bitsandbytes uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
VLLM_TARGET_DEVICE=cpu python setup.py install VLLM_TARGET_DEVICE=cpu python setup.py install
popd popd

View File

@@ -1,3 +1,3 @@
accelerate accelerate
torch==2.4.1 torch
transformers transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
accelerate accelerate
torch==2.4.1+cu118 torch
transformers transformers
bitsandbytes bitsandbytes

Some files were not shown because too many files have changed in this diff Show More