Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
cc11323d1c fix(ci): install latest git
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-10-24 14:55:24 +02:00
311 changed files with 11069 additions and 6238 deletions

View File

@@ -1,11 +0,0 @@
meta {
name: model delete
type: http
seq: 7
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
body: none
auth: none
}

View File

Binary file not shown.

View File

@@ -1,16 +0,0 @@
meta {
name: transcribe
type: http
seq: 1
}
post {
url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
body: multipartForm
auth: none
}
body:multipart-form {
file: @file(transcription/gb1.ogg)
model: whisper-1
}

1
.gitattributes vendored
View File

@@ -1,2 +1 @@
*.sh text eol=lf
backend/cpp/llama/*.hpp linguist-vendored

5
.github/labeler.yml vendored
View File

@@ -1,11 +1,6 @@
enhancements:
- head-branch: ['^feature', 'feature']
dependencies:
- any:
- changed-files:
- any-glob-to-any-file: 'Makefile'
kind/documentation:
- any:
- changed-files:

View File

@@ -12,14 +12,23 @@ jobs:
- repository: "ggerganov/llama.cpp"
variable: "CPPLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-ggml-transformers.cpp"
variable: "GOGGMLTRANSFORMERS_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
- repository: "ggerganov/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
- repository: "PABannier/bark.cpp"
variable: "BARKCPP_VERSION"
- repository: "go-skynet/go-bert.cpp"
variable: "BERT_VERSION"
branch: "master"
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main"
- repository: "leejet/stable-diffusion.cpp"
variable: "STABLEDIFFUSION_GGML_VERSION"
- repository: "mudler/go-ggllm.cpp"
variable: "GOGGLLM_VERSION"
branch: "master"
- repository: "mudler/go-stable-diffusion"
variable: "STABLEDIFFUSION_VERSION"

View File

@@ -23,7 +23,7 @@ jobs:
sudo pip install --upgrade pip
pip install huggingface_hub
- name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.3.1
uses: dcarbone/install-yq-action@v1.1.1
with:
version: 'v4.44.2'
download-compressed: true

View File

@@ -33,7 +33,7 @@ jobs:
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.2.0
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.2.0
uses: appleboy/ssh-action@v1.1.0
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}

View File

@@ -15,7 +15,7 @@ jobs:
strategy:
matrix:
include:
- base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest'
platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}}

View File

@@ -105,6 +105,14 @@ jobs:
tests-parler-tts:
runs-on: ubuntu-latest
steps:
- name: Force Install GIT latest
run: |
sudo apt-get update \
&& sudo apt-get install -y software-properties-common \
&& sudo apt-get update \
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- name: Clone
uses: actions/checkout@v4
with:
@@ -123,13 +131,6 @@ jobs:
run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
tests-openvoice:
runs-on: ubuntu-latest

View File

@@ -224,7 +224,7 @@ jobs:
- name: Dependencies
run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
pip install --user --no-cache-dir grpcio-tools
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test
run: |
export C_INCLUDE_PATH=/usr/local/include

2
.gitignore vendored
View File

@@ -2,7 +2,6 @@
/sources/
__pycache__/
*.a
*.o
get-sources
prepare-sources
/backend/cpp/llama/grpc-server
@@ -13,6 +12,7 @@ prepare-sources
go-ggml-transformers
go-gpt2
go-rwkv
whisper.cpp
/bloomz
go-bert

View File

@@ -85,8 +85,7 @@ WORKDIR /build
# The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
FROM requirements-core AS requirements-extras
# Install uv as a system package
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

237
Makefile
View File

@@ -8,15 +8,23 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623
CPPLLAMA_VERSION?=0a1c750c80147687df267114c81956757cc14382
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
WHISPER_CPP_VERSION?=0fbaac9c891055796456df7b9122a70c220f9ca1
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
# go-piper version
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
# stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
@@ -26,21 +34,8 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057
# bark.cpp
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
BARKCPP_VERSION?=v1.0.0
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=9578fdcc4632dc3de5565f28e2fb16b7c18f8d48
ONNX_VERSION?=1.20.0
ONNX_ARCH?=x64
ONNX_OS?=linux
export BUILD_TYPE?=
export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
export GGML_CMAKE_ARGS?=
export CMAKE_ARGS?=
export BACKEND_LIBS?=
@@ -50,7 +45,6 @@ CGO_LDFLAGS_WHISPER+=-lggml
CUDA_LIBPATH?=/usr/local/cuda/lib64/
GO_TAGS?=
BUILD_ID?=
NATIVE?=false
TEST_DIR=/tmp/test
@@ -89,61 +83,7 @@ ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
# IF native is false, we add -DGGML_NATIVE=OFF to GGML_CMAKE_ARGS
ifeq ($(NATIVE),false)
GGML_CMAKE_ARGS+=-DGGML_NATIVE=OFF
endif
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
GGML_CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to GGML_CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
GGML_CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to GGML_CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
GGML_CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
GGML_CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
GGML_CMAKE_ARGS+=-DGGML_HIP=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
else
GGML_CMAKE_ARGS+=-DGGML_METAL=ON
GGML_CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
GGML_CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif
# Detect if we are running on arm64
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=aarch64
endif
ifeq ($(OS),Darwin)
ONNX_OS=osx
ifneq (,$(findstring aarch64,$(shell uname -m)))
ONNX_ARCH=arm64
else ifneq (,$(findstring arm64,$(shell uname -m)))
ONNX_ARCH=arm64
else
ONNX_ARCH=x86_64
endif
ifeq ($(OSX_SIGNING_IDENTITY),)
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
@@ -154,7 +94,7 @@ ifeq ($(OS),Darwin)
BUILD_TYPE=metal
# disable metal if on Darwin and any other value is explicitly passed.
else ifneq ($(BUILD_TYPE),metal)
GGML_CMAKE_ARGS+=-DGGML_METAL=OFF
CMAKE_ARGS+=-DGGML_METAL=OFF
export GGML_NO_ACCELERATE=1
export GGML_NO_METAL=1
endif
@@ -179,7 +119,7 @@ ifeq ($(BUILD_TYPE),cublas)
endif
ifeq ($(BUILD_TYPE),vulkan)
GGML_CMAKE_ARGS+=-DGGML_VULKAN=1
CMAKE_ARGS+=-DGGML_VULKAN=1
endif
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
@@ -198,10 +138,10 @@ ifeq ($(BUILD_TYPE),hipblas)
export CC=$(ROCM_HOME)/llvm/bin/clang
# llama-ggml has no hipblas support, so override it here.
export STABLE_BUILD_TYPE=
export GGML_HIP=1
export GGML_HIPBLAS=1
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
GGML_CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
endif
@@ -239,23 +179,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ifeq ($(ONNX_OS),linux)
ifeq ($(ONNX_ARCH),x64)
ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
endif
endif
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
# Use filter-out to remove the specified backends
ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
@@ -272,12 +205,23 @@ ifeq ($(BUILD_API_ONLY),true)
GRPC_BACKENDS=
endif
export CMAKE_ARGS?=$(GGML_CMAKE_ARGS)
.PHONY: all test build vendor get-sources prepare-sources prepare
all: help
## BERT embeddings
sources/go-bert.cpp:
mkdir -p sources/go-bert.cpp
cd sources/go-bert.cpp && \
git init && \
git remote add origin $(BERT_REPO) && \
git fetch origin && \
git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a
## go-llama.cpp
sources/go-llama.cpp:
mkdir -p sources/go-llama.cpp
@@ -291,23 +235,6 @@ sources/go-llama.cpp:
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
## bark.cpp
sources/bark.cpp:
git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
cd sources/bark.cpp && \
git checkout $(BARKCPP_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/bark.cpp/build/libbark.a: sources/bark.cpp
cd sources/bark.cpp && \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) .. && \
cmake --build . --config Release
backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
$(MAKE) -C backend/go/bark libbark.a
## go-piper
sources/go-piper:
mkdir -p sources/go-piper
@@ -321,7 +248,21 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## stable diffusion (onnx)
## RWKV
sources/go-rwkv.cpp:
mkdir -p sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && \
git init && \
git remote add origin $(RWKV_REPO) && \
git fetch origin && \
git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
## stable diffusion
sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion
cd sources/go-stable-diffusion && \
@@ -334,44 +275,6 @@ sources/go-stable-diffusion:
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## stablediffusion (ggml)
sources/stablediffusion-ggml.cpp:
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
cd sources/stablediffusion-ggml.cpp && \
git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
cd sources/stablediffusion-ggml.cpp && \
mkdir -p build && \
cd build && \
cmake $(CMAKE_ARGS) .. && \
cmake --build . --config Release
backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion-ggml
endif
sources/onnxruntime:
mkdir -p sources/onnxruntime
curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
ifeq ($(OS),Darwin)
mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
else
mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
endif
## tiny-dream
sources/go-tiny-dream:
mkdir -p sources/go-tiny-dream
@@ -398,19 +301,23 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
@@ -423,8 +330,10 @@ prepare-sources: get-sources replace
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean
$(MAKE) -C sources/go-piper clean
$(MAKE) -C sources/go-tiny-dream clean
$(MAKE) build
@@ -439,9 +348,7 @@ clean: ## Remove build related file
rm -rf release/
rm -rf backend-assets/*
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/go/bark clean
$(MAKE) -C backend/cpp/llama clean
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
rm -rf backend/cpp/llama-* || true
$(MAKE) dropreplace
$(MAKE) protogen-clean
@@ -532,6 +439,8 @@ test-models/testmodel.ggml:
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
cp tests/models_fixtures/* test-models
prepare-test: grpcs
@@ -784,6 +693,13 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
@@ -843,6 +759,10 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-cuda
@@ -855,7 +775,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -890,13 +810,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/llama-ggml
endif
backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bark-cpp
endif
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
@@ -904,6 +817,13 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper
endif
backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/rwkv
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
@@ -911,13 +831,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion
endif
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/silero-vad
endif
backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
@@ -978,7 +891,7 @@ docker-aio-all:
docker-image-intel:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -986,7 +899,7 @@ docker-image-intel:
docker-image-intel-xpu:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \

View File

@@ -38,13 +38,9 @@
</a>
</p>
<p align="center">
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples)
> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
@@ -60,17 +56,14 @@ curl https://localai.io/install.sh | sh
Or run with docker:
```bash
# CPU only image:
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
# Nvidia GPU:
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
# CPU and GPU image (bigger size):
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# Alternative images:
# - if you have an Nvidia GPU:
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
# - without preconfigured models
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
# - without preconfigured models for Nvidia GPUs
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
```
To load models:
@@ -92,10 +85,6 @@ local-ai run oci://localai/phi-2:latest
## 📰 Latest project news
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
@@ -166,9 +155,6 @@ Other:
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Another Telegram Bot https://github.com/JackBekket/Hellper
- Auto-documentation https://github.com/JackBekket/Reflexia
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
- Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
@@ -243,6 +229,7 @@ LocalAI couldn't have been built without the help of great software already avai
- https://github.com/antimatter15/alpaca.cpp
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/ggerganov/whisper.cpp
- https://github.com/saharNooby/rwkv.cpp
- https://github.com/rhasspy/piper
## 🤗 Contributors

View File

@@ -1,7 +1,7 @@
name: text-embedding-ada-002
embeddings: true
backend: bert-embeddings
parameters:
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
usage: |
You can test this model with curl like this:

View File

@@ -28,8 +28,6 @@ service Backend {
rpc Rerank(RerankRequest) returns (RerankResult) {}
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
rpc VAD(VADRequest) returns (VADResponse) {}
}
// Define the empty request
@@ -235,16 +233,6 @@ message ModelOptions {
bool FlashAttention = 56;
bool NoKVOffload = 57;
string ModelPath = 59;
repeated string LoraAdapters = 60;
repeated float LoraScales = 61;
repeated string Options = 62;
string CacheTypeKey = 63;
string CacheTypeValue = 64;
}
message Result {
@@ -300,19 +288,6 @@ message TTSRequest {
optional string language = 5;
}
message VADRequest {
repeated float audio = 1;
}
message VADSegment {
float start = 1;
float end = 2;
}
message VADResponse {
repeated VADSegment segments = 1;
}
message SoundGenerationRequest {
string text = 1;
string model = 2;

View File

@@ -7,6 +7,44 @@ BUILD_TYPE?=
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF
# If build type is cublas, then we set -DGGML_CUDA=ON to CMAKE_ARGS automatically
ifeq ($(BUILD_TYPE),cublas)
CMAKE_ARGS+=-DGGML_CUDA=ON
# If build type is openblas then we set -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# to CMAKE_ARGS automatically
else ifeq ($(BUILD_TYPE),openblas)
CMAKE_ARGS+=-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
# If build type is clblas (openCL) we set -DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
else ifeq ($(BUILD_TYPE),clblas)
CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
# If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
else ifeq ($(BUILD_TYPE),hipblas)
CMAKE_ARGS+=-DGGML_HIPBLAS=ON
# If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
# But if it's OSX without metal, disable it here
else ifeq ($(OS),Darwin)
ifneq ($(BUILD_TYPE),metal)
CMAKE_ARGS+=-DGGML_METAL=OFF
else
CMAKE_ARGS+=-DGGML_METAL=ON
# Until this is tested properly, we disable embedded metal file
# as we already embed it as part of the LocalAI assets
CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
TARGET+=--target ggml-metal
endif
endif
ifeq ($(BUILD_TYPE),sycl_f16)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
endif
ifeq ($(BUILD_TYPE),sycl_f32)
CMAKE_ARGS+=-DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
endif
llama.cpp:
mkdir -p llama.cpp
cd llama.cpp && \
@@ -34,7 +72,7 @@ clean: purge
rm -rf llama.cpp
grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server for llama.cpp with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release $(TARGET)"

View File

@@ -203,7 +203,7 @@ struct llama_client_slot
std::string stopping_word;
// sampling
struct common_params_sampling sparams;
struct common_sampler_params sparams;
common_sampler *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state
@@ -662,7 +662,7 @@ struct llama_server_context
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params;
common_params_sampling default_sparams;
common_sampler_params default_sparams;
slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
@@ -670,6 +670,7 @@ struct llama_server_context
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot->sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
@@ -1205,6 +1206,7 @@ struct llama_server_context
{"top_k", slot.sparams.top_k},
{"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typ_p},
{"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat},
@@ -2103,6 +2105,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
// slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
// slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
// slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
// slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
// slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
// slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
@@ -2126,6 +2129,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
data["top_k"] = predict->topk();
data["top_p"] = predict->topp();
data["tfs_z"] = predict->tailfreesamplingz();
data["typical_p"] = predict->typicalp();
data["temperature"] = predict->temperature();
data["repeat_last_n"] = predict->repeat();
@@ -2172,6 +2176,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
// llama.params.sparams.top_k = predict->topk();
// llama.params.sparams.top_p = predict->topp();
// llama.params.sparams.tfs_z = predict->tailfreesamplingz();
// llama.params.sparams.typical_p = predict->typicalp();
// llama.params.sparams.penalty_last_n = predict->repeat();
// llama.params.sparams.temp = predict->temperature();
@@ -2241,12 +2246,6 @@ static void params_parse(const backend::ModelOptions* request,
}
// params.model_alias ??
params.model_alias = request->modelfile();
if (!request->cachetypekey().empty()) {
params.cache_type_k = request->cachetypekey();
}
if (!request->cachetypevalue().empty()) {
params.cache_type_v = request->cachetypevalue();
}
params.n_ctx = request->contextsize();
//params.memory_f16 = request->f16memory();
params.cpuparams.n_threads = request->threads();
@@ -2305,7 +2304,6 @@ static void params_parse(const backend::ModelOptions* request,
params.use_mmap = request->mmap();
params.flash_attn = request->flashattention();
params.no_kv_offload = request->nokvoffload();
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)
params.embedding = request->embeddings();

View File

@@ -1,25 +0,0 @@
INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./)
AR?=ar
BUILD_TYPE?=
# keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
LDFLAGS = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
# warnings
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
gobark.o:
$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
libbark.a: gobark.o
cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
$(AR) rcs libbark.a gobark.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
clean:
rm -f gobark.o libbark.a

View File

@@ -1,85 +0,0 @@
#include <iostream>
#include <tuple>
#include "bark.h"
#include "gobark.h"
#include "common.h"
#include "ggml.h"
struct bark_context *c;
void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
if (step == bark_encoding_step::SEMANTIC) {
printf("\rGenerating semantic tokens... %d%%", progress);
} else if (step == bark_encoding_step::COARSE) {
printf("\rGenerating coarse tokens... %d%%", progress);
} else if (step == bark_encoding_step::FINE) {
printf("\rGenerating fine tokens... %d%%", progress);
}
fflush(stdout);
}
int load_model(char *model) {
// initialize bark context
struct bark_context_params ctx_params = bark_context_default_params();
bark_params params;
params.model_path = model;
// ctx_params.verbosity = verbosity;
ctx_params.progress_callback = bark_print_progress_callback;
ctx_params.progress_callback_user_data = nullptr;
struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
if (!bctx) {
fprintf(stderr, "%s: Could not load model\n", __func__);
return 1;
}
c = bctx;
return 0;
}
int tts(char *text,int threads, char *dst ) {
ggml_time_init();
const int64_t t_main_start_us = ggml_time_us();
// generate audio
if (!bark_generate_audio(c, text, threads)) {
fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
return 1;
}
const float *audio_data = bark_get_audio_data(c);
if (audio_data == NULL) {
fprintf(stderr, "%s: Could not get audio data\n", __func__);
return 1;
}
const int audio_arr_size = bark_get_audio_data_size(c);
std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
write_wav_on_disk(audio_arr, dst);
// report timing
{
const int64_t t_main_end_us = ggml_time_us();
const int64_t t_load_us = bark_get_load_time(c);
const int64_t t_eval_us = bark_get_eval_time(c);
printf("\n\n");
printf("%s: load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
printf("%s: eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
}
return 0;
}
int unload() {
bark_free(c);
}

View File

@@ -1,52 +0,0 @@
package main
// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
// #include <gobark.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Bark struct {
base.SingleThread
threads int
}
func (sd *Bark) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads)
modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile))
ret := C.load_model(modelFile)
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}
func (sd *Bark) TTS(opts *pb.TTSRequest) error {
t := C.CString(opts.Text)
defer C.free(unsafe.Pointer(t))
dst := C.CString(opts.Dst)
defer C.free(unsafe.Pointer(dst))
threads := C.int(sd.threads)
ret := C.tts(t, threads, dst)
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}

View File

@@ -1,8 +0,0 @@
#ifdef __cplusplus
extern "C" {
#endif
int load_model(char *model);
int tts(char *text,int threads, char *dst );
#ifdef __cplusplus
}
#endif

View File

@@ -1,21 +0,0 @@
INCLUDE_PATH := $(abspath ./)
LIBRARY_PATH := $(abspath ./)
AR?=ar
BUILD_TYPE?=
# keep standard at C11 and C++11
CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
# warnings
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
gosd.o:
$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
libsd.a: gosd.o
cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
$(AR) rcs libsd.a gosd.o
clean:
rm -f gosd.o libsd.a

View File

@@ -1,228 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <iostream>
#include <random>
#include <string>
#include <vector>
#include "gosd.h"
// #include "preprocessing.hpp"
#include "flux.hpp"
#include "stable-diffusion.h"
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_STATIC
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#define STB_IMAGE_WRITE_STATIC
#include "stb_image_write.h"
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#define STB_IMAGE_RESIZE_STATIC
#include "stb_image_resize.h"
// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
const char* sample_method_str[] = {
"euler_a",
"euler",
"heun",
"dpm2",
"dpm++2s_a",
"dpm++2m",
"dpm++2mv2",
"ipndm",
"ipndm_v",
"lcm",
};
// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
const char* schedule_str[] = {
"default",
"discrete",
"karras",
"exponential",
"ays",
"gits",
};
sd_ctx_t* sd_c;
sample_method_t sample_method;
int load_model(char *model, char* options[], int threads, int diff) {
fprintf (stderr, "Loading model!\n");
char *stableDiffusionModel = "";
if (diff == 1 ) {
stableDiffusionModel = model;
model = "";
}
// decode options. Options are in form optname:optvale, or if booleans only optname.
char *clip_l_path = "";
char *clip_g_path = "";
char *t5xxl_path = "";
char *vae_path = "";
char *scheduler = "";
char *sampler = "";
// If options is not NULL, parse options
for (int i = 0; options[i] != NULL; i++) {
char *optname = strtok(options[i], ":");
char *optval = strtok(NULL, ":");
if (optval == NULL) {
optval = "true";
}
if (!strcmp(optname, "clip_l_path")) {
clip_l_path = optval;
}
if (!strcmp(optname, "clip_g_path")) {
clip_g_path = optval;
}
if (!strcmp(optname, "t5xxl_path")) {
t5xxl_path = optval;
}
if (!strcmp(optname, "vae_path")) {
vae_path = optval;
}
if (!strcmp(optname, "scheduler")) {
scheduler = optval;
}
if (!strcmp(optname, "sampler")) {
sampler = optval;
}
}
int sample_method_found = -1;
for (int m = 0; m < N_SAMPLE_METHODS; m++) {
if (!strcmp(sampler, sample_method_str[m])) {
sample_method_found = m;
}
}
if (sample_method_found == -1) {
fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
sample_method_found = EULER_A;
}
sample_method = (sample_method_t)sample_method_found;
int schedule_found = -1;
for (int d = 0; d < N_SCHEDULES; d++) {
if (!strcmp(scheduler, schedule_str[d])) {
schedule_found = d;
fprintf (stderr, "Found scheduler: %s\n", scheduler);
}
}
if (schedule_found == -1) {
fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
schedule_found = DEFAULT;
}
schedule_t schedule = (schedule_t)schedule_found;
fprintf (stderr, "Creating context\n");
sd_ctx_t* sd_ctx = new_sd_ctx(model,
clip_l_path,
clip_g_path,
t5xxl_path,
stableDiffusionModel,
vae_path,
"",
"",
"",
"",
"",
false,
false,
false,
threads,
SD_TYPE_COUNT,
STD_DEFAULT_RNG,
schedule,
false,
false,
false,
false);
if (sd_ctx == NULL) {
fprintf (stderr, "failed loading model (generic error)\n");
return 1;
}
fprintf (stderr, "Created context: OK\n");
sd_c = sd_ctx;
return 0;
}
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
sd_image_t* results;
std::vector<int> skip_layers = {7, 8, 9};
fprintf (stderr, "Generating image\n");
results = txt2img(sd_c,
text,
negativeText,
-1, //clip_skip
cfg_scale, // sfg_scale
3.5f,
width,
height,
sample_method,
steps,
seed,
1,
NULL,
0.9f,
20.f,
false,
"",
skip_layers.data(),
skip_layers.size(),
0,
0.01,
0.2);
if (results == NULL) {
fprintf (stderr, "NO results\n");
return 1;
}
if (results[0].data == NULL) {
fprintf (stderr, "Results with no data\n");
return 1;
}
fprintf (stderr, "Writing PNG\n");
fprintf (stderr, "DST: %s\n", dst);
fprintf (stderr, "Width: %d\n", results[0].width);
fprintf (stderr, "Height: %d\n", results[0].height);
fprintf (stderr, "Channel: %d\n", results[0].channel);
fprintf (stderr, "Data: %p\n", results[0].data);
stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
results[0].data, 0, NULL);
fprintf (stderr, "Saved resulting image to '%s'\n", dst);
// TODO: free results. Why does it crash?
free(results[0].data);
results[0].data = NULL;
free(results);
fprintf (stderr, "gen_image is done", dst);
return 0;
}
int unload() {
free_sd_ctx(sd_c);
}

View File

@@ -1,96 +0,0 @@
package main
// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
// #include <gosd.h>
// #include <stdlib.h>
import "C"
import (
"fmt"
"os"
"path/filepath"
"strings"
"unsafe"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/utils"
)
type SDGGML struct {
base.SingleThread
threads int
sampleMethod string
cfgScale float32
}
func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
sd.threads = int(opts.Threads)
modelFile := C.CString(opts.ModelFile)
defer C.free(unsafe.Pointer(modelFile))
var options **C.char
// prepare the options array to pass to C
size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
length := C.size_t(len(opts.Options))
options = (**C.char)(C.malloc(length * size))
view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
var diffusionModel int
var oo []string
for _, op := range opts.Options {
if op == "diffusion_model" {
diffusionModel = 1
continue
}
// If it's an option path, we resolve absolute path from the model path
if strings.Contains(op, ":") && strings.Contains(op, "path") {
data := strings.Split(op, ":")
data[1] = filepath.Join(opts.ModelPath, data[1])
if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
oo = append(oo, strings.Join(data, ":"))
}
} else {
oo = append(oo, op)
}
}
fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
for i, x := range oo {
view[i] = C.CString(x)
}
sd.cfgScale = opts.CFGScale
ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
if ret != 0 {
return fmt.Errorf("could not load model")
}
return nil
}
func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
t := C.CString(opts.PositivePrompt)
defer C.free(unsafe.Pointer(t))
dst := C.CString(opts.Dst)
defer C.free(unsafe.Pointer(dst))
negative := C.CString(opts.NegativePrompt)
defer C.free(unsafe.Pointer(negative))
ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
if ret != 0 {
return fmt.Errorf("inference failed")
}
return nil
}

View File

@@ -1,8 +0,0 @@
#ifdef __cplusplus
extern "C" {
#endif
int load_model(char *model, char* options[], int threads, int diffusionModel);
int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
#ifdef __cplusplus
}
#endif

View File

@@ -1,20 +0,0 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,34 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
bert "github.com/go-skynet/go-bert.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
type Embeddings struct {
base.SingleThread
bert *bert.Bert
}
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
model, err := bert.New(opts.ModelFile)
llm.bert = model
return err
}
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
if len(opts.EmbeddingTokens) > 0 {
tokens := []int{}
for _, t := range opts.EmbeddingTokens {
tokens = append(tokens, int(t))
}
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
}
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
}

View File

@@ -1,6 +1,7 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
@@ -14,7 +15,7 @@ var (
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Bark{}); err != nil {
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
panic(err)
}
}

View File

@@ -15,7 +15,7 @@ var (
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &VAD{}); err != nil {
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,95 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"path/filepath"
"github.com/donomii/go-rwkv.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
)
const tokenizerSuffix = ".tokenizer.json"
type LLM struct {
base.SingleThread
rwkv *rwkv.RwkvState
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
tokenizerFile := opts.Tokenizer
if tokenizerFile == "" {
modelFile := filepath.Base(opts.ModelFile)
tokenizerFile = modelFile + tokenizerSuffix
}
modelPath := filepath.Dir(opts.ModelFile)
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
if model == nil {
return fmt.Errorf("rwkv could not load model")
}
llm.rwkv = model
return nil
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
return "", err
}
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
return response, nil
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
go func() {
stopWord := "\n"
if len(opts.StopPrompts) > 0 {
stopWord = opts.StopPrompts[0]
}
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
fmt.Println("Error processing input: ", err)
return
}
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
results <- s
return true
})
close(results)
}()
return nil
}
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
if err != nil {
return pb.TokenizationResponse{}, err
}
l := len(tokens)
i32Tokens := make([]int32, l)
for i, t := range tokens {
i32Tokens[i] = int32(t.ID)
}
return pb.TokenizationResponse{
Length: int32(l),
Tokens: i32Tokens,
}, nil
}

View File

@@ -1,54 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/streamer45/silero-vad-go/speech"
)
type VAD struct {
base.SingleThread
detector *speech.Detector
}
func (vad *VAD) Load(opts *pb.ModelOptions) error {
v, err := speech.NewDetector(speech.DetectorConfig{
ModelPath: opts.ModelFile,
SampleRate: 16000,
//WindowSize: 1024,
Threshold: 0.5,
MinSilenceDurationMs: 0,
SpeechPadMs: 0,
})
if err != nil {
return fmt.Errorf("create silero detector: %w", err)
}
vad.detector = v
return err
}
func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
audio := req.Audio
segments, err := vad.detector.Detect(audio)
if err != nil {
return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
}
vadSegments := []*pb.VADSegment{}
for _, s := range segments {
vadSegments = append(vadSegments, &pb.VADSegment{
Start: float32(s.SpeechStartAt),
End: float32(s.SpeechEndAt),
})
}
return pb.VADResponse{
Segments: vadSegments,
}, nil
}

View File

@@ -1,6 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi
transformers

View File

@@ -1,4 +1,4 @@
bark==0.1.5
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi

View File

@@ -1,9 +1,8 @@
.DEFAULT_GOAL := install
.PHONY: install
install:
install: protogen
bash install.sh
$(MAKE) protogen
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
@@ -13,7 +12,7 @@ protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,3 +1,2 @@
grpcio==1.68.1
protobuf
grpcio-tools
grpcio==1.67.0
protobuf

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi
packaging==24.1

View File

@@ -247,16 +247,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
use_safetensors=True,
variant=variant)
elif request.PipelineType == "FluxPipeline":
if fromSingleFile:
self.pipe = FluxPipeline.from_single_file(modelFile,
torch_dtype=torchType,
use_safetensors=True)
else:
self.pipe = FluxPipeline.from_pretrained(
request.Model,
torch_dtype=torch.bfloat16)
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
elif request.PipelineType == "FluxTransformer2DModel":
dtype = torch.bfloat16
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
@@ -301,34 +296,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.controlnet = self.controlnet
else:
self.controlnet = None
if request.LoraAdapter and not os.path.isabs(request.LoraAdapter):
# Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
# get base path of modelFile
modelFileBase = os.path.dirname(request.ModelFile)
# modify LoraAdapter to be relative to modelFileBase
request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
device = "cpu" if not request.CUDA else "cuda"
self.device = device
if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
self.pipe.load_lora_weights(request.LoraAdapter)
else:
self.pipe.unet.load_attn_procs(request.LoraAdapter)
if len(request.LoraAdapters) > 0:
i = 0
adapters_name = []
adapters_weights = []
for adapter in request.LoraAdapters:
if not os.path.isabs(adapter):
adapter = os.path.join(request.ModelPath, adapter)
self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
adapters_name.append(f"adapter_{i}")
i += 1
for adapters_weight in request.LoraScales:
adapters_weights.append(adapters_weight)
self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)
if request.CUDA:
self.pipe.to('cuda')
@@ -409,6 +392,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of values for the parameters
options = {
"negative_prompt": request.negative_prompt,
"width": request.width,
"height": request.height,
"num_inference_steps": steps,
}
@@ -426,13 +411,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
keys = options.keys()
if request.EnableParameters != "":
keys = [key.strip() for key in request.EnableParameters.split(",")]
keys = request.EnableParameters.split(",")
if request.EnableParameters == "none":
keys = []
# create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
kwargs = {key: options.get(key) for key in keys if key in options}
kwargs = {key: options[key] for key in keys}
# Set seed
if request.seed > 0:
@@ -443,12 +428,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if self.PipelineType == "FluxPipeline":
kwargs["max_sequence_length"] = 256
if request.width:
kwargs["width"] = request.width
if request.height:
kwargs["height"] = request.height
if self.PipelineType == "FluxTransformer2DModel":
kwargs["output_type"] = "pil"
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
@@ -468,7 +447,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
export_to_video(video_frames, request.dst)
return backend_pb2.Result(message="Media generated successfully", success=True)
print(f"Generating image with {kwargs=}", file=sys.stderr)
image = {}
if COMPEL:
conditioning, pooled = self.compel.build_conditioning_tensor(prompt)

View File

@@ -1,5 +1,5 @@
setuptools
grpcio==1.68.1
grpcio==1.67.0
pillow
protobuf
certifi

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi
wheel

View File

@@ -1,3 +1,3 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi

View File

@@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.68.1
grpcio==1.67.0
protobuf
librosa==0.9.1
faster-whisper==0.9.0

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
librosa
faster-whisper

View File

@@ -12,10 +12,9 @@ export SKIP_CONDA=1
endif
.PHONY: parler-tts
parler-tts:
parler-tts: protogen
@echo "Installing $(CONDA_ENV_PATH)..."
bash install.sh $(CONDA_ENV_PATH)
$(MAKE) protogen
.PHONY: run
run: protogen
@@ -37,7 +36,7 @@ protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean

View File

@@ -11,18 +11,16 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
installRequirements
# https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions.
PYDIR=python3.10
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
# PYDIR=python3.10
# pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
if [ ! -d ${pyenv} ]; then
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
exit 1
fi
# if [ ! -d ${pyenv} ]; then
# echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
# exit 1
# fi
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
# curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py

View File

@@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

View File

@@ -1,4 +1,4 @@
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
llvmlite==0.43.0
numba==0.60.0
grpcio-tools==1.42.0
git+https://github.com/descriptinc/audiotools

View File

@@ -1,3 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi
llvmlite==0.43.0
llvmlite==0.43.0

View File

@@ -1,3 +1,3 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi

View File

@@ -2,5 +2,5 @@ torch==2.4.1
accelerate
transformers
bitsandbytes
sentence-transformers==3.3.1
sentence-transformers==3.2.0
transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118
accelerate
sentence-transformers==3.3.1
sentence-transformers==3.2.0
transformers

View File

@@ -1,4 +1,4 @@
torch==2.4.1
accelerate
sentence-transformers==3.3.1
sentence-transformers==3.2.0
transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0
accelerate
sentence-transformers==3.3.1
sentence-transformers==3.2.0
transformers

View File

@@ -4,5 +4,5 @@ torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
accelerate
sentence-transformers==3.3.1
sentence-transformers==3.2.0
transformers

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi
datasets

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
scipy==1.14.0
certifi

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -1,3 +1,3 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi

View File

@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
git clone https://github.com/vllm-project/vllm
fi
pushd vllm
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.1 protobuf bitsandbytes
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
VLLM_TARGET_DEVICE=cpu python setup.py install
popd

View File

@@ -1,4 +1,4 @@
grpcio==1.68.1
grpcio==1.67.0
protobuf
certifi
setuptools

38
core/application.go Normal file
View File

@@ -0,0 +1,38 @@
package core
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
)
// The purpose of this structure is to hold pointers to all initialized services, to make plumbing easy
// Perhaps a proper DI system is worth it in the future, but for now keep things simple.
type Application struct {
// Application-Level Config
ApplicationConfig *config.ApplicationConfig
// ApplicationState *ApplicationState
// Core Low-Level Services
BackendConfigLoader *config.BackendConfigLoader
ModelLoader *model.ModelLoader
// Backend Services
// EmbeddingsBackendService *backend.EmbeddingsBackendService
// ImageGenerationBackendService *backend.ImageGenerationBackendService
// LLMBackendService *backend.LLMBackendService
// TranscriptionBackendService *backend.TranscriptionBackendService
// TextToSpeechBackendService *backend.TextToSpeechBackendService
// LocalAI System Services
BackendMonitorService *services.BackendMonitorService
GalleryService *services.GalleryService
LocalAIMetricsService *services.LocalAIMetricsService
// OpenAIService *services.OpenAIService
}
// TODO [NEXT PR?]: Break up ApplicationConfig.
// Migrate over stuff that is not set via config at all - especially runtime stuff
type ApplicationState struct {
}

View File

@@ -1,39 +0,0 @@
package application
import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
)
type Application struct {
backendLoader *config.BackendConfigLoader
modelLoader *model.ModelLoader
applicationConfig *config.ApplicationConfig
templatesEvaluator *templates.Evaluator
}
func newApplication(appConfig *config.ApplicationConfig) *Application {
return &Application{
backendLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
modelLoader: model.NewModelLoader(appConfig.ModelPath),
applicationConfig: appConfig,
templatesEvaluator: templates.NewEvaluator(appConfig.ModelPath),
}
}
func (a *Application) BackendLoader() *config.BackendConfigLoader {
return a.backendLoader
}
func (a *Application) ModelLoader() *model.ModelLoader {
return a.modelLoader
}
func (a *Application) ApplicationConfig() *config.ApplicationConfig {
return a.applicationConfig
}
func (a *Application) TemplatesEvaluator() *templates.Evaluator {
return a.templatesEvaluator
}

View File

@@ -11,9 +11,17 @@ import (
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
opts := ModelOptions(backendConfig, appConfig)
var inferenceModel interface{}
var err error
inferenceModel, err := loader.Load(opts...)
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
if backendConfig.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
opts = append(opts, model.WithBackendString(backendConfig.Backend))
inferenceModel, err = loader.BackendLoader(opts...)
}
if err != nil {
return nil, err
}

View File

@@ -9,8 +9,9 @@ import (
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
opts := ModelOptions(backendConfig, appConfig)
inferenceModel, err := loader.Load(
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
inferenceModel, err := loader.BackendLoader(
opts...,
)
if err != nil {

View File

@@ -16,6 +16,7 @@ import (
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/grpc"
"github.com/mudler/LocalAI/pkg/grpc/proto"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils"
@@ -34,6 +35,15 @@ type TokenUsage struct {
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model
var inferenceModel grpc.Backend
var err error
opts := ModelOptions(c, o, []model.Option{})
if c.Backend != "" {
opts = append(opts, model.WithBackendString(c.Backend))
}
// Check if the modelFile exists, if it doesn't try to load it from the gallery
if o.AutoloadGalleries { // experimental
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
@@ -46,8 +56,12 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
}
}
opts := ModelOptions(c, o)
inferenceModel, err := loader.Load(opts...)
if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
inferenceModel, err = loader.BackendLoader(opts...)
}
if err != nil {
return nil, err
}

View File

@@ -11,7 +11,7 @@ import (
"github.com/rs/zerolog/log"
)
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...model.Option) []model.Option {
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
name := c.Name
if name == "" {
name = c.Model
@@ -122,17 +122,14 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
CUDA: c.CUDA || c.Diffusers.CUDA,
SchedulerType: c.Diffusers.SchedulerType,
PipelineType: c.Diffusers.PipelineType,
CFGScale: c.CFGScale,
CFGScale: c.Diffusers.CFGScale,
LoraAdapter: c.LoraAdapter,
LoraScale: c.LoraScale,
LoraAdapters: c.LoraAdapters,
LoraScales: c.LoraScales,
F16Memory: f16,
LoraBase: c.LoraBase,
IMG2IMG: c.Diffusers.IMG2IMG,
CLIPModel: c.Diffusers.ClipModel,
CLIPSubfolder: c.Diffusers.ClipSubFolder,
Options: c.Options,
CLIPSkip: int32(c.Diffusers.ClipSkip),
ControlNet: c.Diffusers.ControlNet,
ContextSize: int32(ctxSize),
@@ -151,8 +148,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
TensorParallelSize: int32(c.TensorParallelSize),
MMProj: c.MMProj,
FlashAttention: c.FlashAttention,
CacheTypeKey: c.CacheTypeK,
CacheTypeValue: c.CacheTypeV,
NoKVOffload: c.NoKVOffloading,
YarnExtFactor: c.YarnExtFactor,
YarnAttnFactor: c.YarnAttnFactor,

View File

@@ -11,8 +11,8 @@ import (
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
rerankModel, err := loader.Load(opts...)
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
rerankModel, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err
}

View File

@@ -25,8 +25,9 @@ func SoundGeneration(
backendConfig config.BackendConfig,
) (string, *proto.Result, error) {
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
soundGenModel, err := loader.Load(opts...)
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
soundGenModel, err := loader.BackendLoader(opts...)
if err != nil {
return "", nil, err
}

View File

@@ -8,15 +8,16 @@ import (
)
func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
if storeName == "" {
storeName = "default"
}
if storeName == "" {
storeName = "default"
}
sc := []model.Option{
model.WithBackendString(model.LocalStoreBackend),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithModel(storeName),
}
sc := []model.Option{
model.WithBackendString(model.LocalStoreBackend),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithModel(storeName),
}
return sl.Load(sc...)
return sl.BackendLoader(sc...)
}

View File

@@ -15,8 +15,10 @@ func TokenMetrics(
appConfig *config.ApplicationConfig,
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
model, err := loader.Load(opts...)
opts := ModelOptions(backendConfig, appConfig, []model.Option{
model.WithModel(modelFile),
})
model, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err
}

View File

@@ -14,13 +14,15 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
var inferenceModel grpc.Backend
var err error
opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
opts := ModelOptions(backendConfig, appConfig, []model.Option{
model.WithModel(modelFile),
})
if backendConfig.Backend == "" {
inferenceModel, err = loader.Load(opts...)
inferenceModel, err = loader.GreedyLoader(opts...)
} else {
opts = append(opts, model.WithBackendString(backendConfig.Backend))
inferenceModel, err = loader.Load(opts...)
inferenceModel, err = loader.BackendLoader(opts...)
}
if err != nil {
return schema.TokenizeResponse{}, err

View File

@@ -18,9 +18,9 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
backendConfig.Backend = model.WhisperBackend
}
opts := ModelOptions(backendConfig, appConfig)
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
transcriptionModel, err := ml.Load(opts...)
transcriptionModel, err := ml.BackendLoader(opts...)
if err != nil {
return nil, err
}

View File

@@ -28,8 +28,11 @@ func ModelTTS(
bb = model.PiperBackend
}
opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
ttsModel, err := loader.Load(opts...)
opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
})
ttsModel, err := loader.BackendLoader(opts...)
if err != nil {
return "", nil, err
}

View File

@@ -6,12 +6,12 @@ import (
"strings"
"time"
"github.com/mudler/LocalAI/core/application"
cli_api "github.com/mudler/LocalAI/core/cli/api"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/startup"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
@@ -186,16 +186,16 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
}
if r.PreloadBackendOnly {
_, err := application.New(opts...)
_, _, _, err := startup.Startup(opts...)
return err
}
app, err := application.New(opts...)
cl, ml, options, err := startup.Startup(opts...)
if err != nil {
return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
}
appHTTP, err := http.API(app)
appHTTP, err := http.App(cl, ml, options)
if err != nil {
log.Error().Err(err).Msg("error during HTTP App construction")
return err

View File

@@ -76,14 +76,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
"util",
"llama-cpp-rpc-server",
)
var extraArgs []string
if r.ExtraLLamaCPPArgs != "" {
extraArgs = strings.Split(r.ExtraLLamaCPPArgs, " ")
}
extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
cmd := exec.Command(

View File

@@ -72,8 +72,6 @@ type BackendConfig struct {
Description string `yaml:"description"`
Usage string `yaml:"usage"`
Options []string `yaml:"options"`
}
type File struct {
@@ -99,15 +97,16 @@ type GRPC struct {
}
type Diffusers struct {
CUDA bool `yaml:"cuda"`
PipelineType string `yaml:"pipeline_type"`
SchedulerType string `yaml:"scheduler_type"`
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
ClipModel string `yaml:"clip_model"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net"`
CUDA bool `yaml:"cuda"`
PipelineType string `yaml:"pipeline_type"`
SchedulerType string `yaml:"scheduler_type"`
EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
IMG2IMG bool `yaml:"img2img"` // Image to Image Diffuser
ClipSkip int `yaml:"clip_skip"` // Skip every N frames
ClipModel string `yaml:"clip_model"` // Clip model to use
ClipSubFolder string `yaml:"clip_subfolder"` // Subfolder to use for clip model
ControlNet string `yaml:"control_net"`
}
// LLMConfig is a struct that holds the configuration that are
@@ -135,30 +134,26 @@ type LLMConfig struct {
TrimSpace []string `yaml:"trimspace"`
TrimSuffix []string `yaml:"trimsuffix"`
ContextSize *int `yaml:"context_size"`
NUMA bool `yaml:"numa"`
LoraAdapter string `yaml:"lora_adapter"`
LoraBase string `yaml:"lora_base"`
LoraAdapters []string `yaml:"lora_adapters"`
LoraScales []float32 `yaml:"lora_scales"`
LoraScale float32 `yaml:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq"`
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
LoadFormat string `yaml:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
MMProj string `yaml:"mmproj"`
ContextSize *int `yaml:"context_size"`
NUMA bool `yaml:"numa"`
LoraAdapter string `yaml:"lora_adapter"`
LoraBase string `yaml:"lora_base"`
LoraScale float32 `yaml:"lora_scale"`
NoMulMatQ bool `yaml:"no_mulmatq"`
DraftModel string `yaml:"draft_model"`
NDraft int32 `yaml:"n_draft"`
Quantization string `yaml:"quantization"`
LoadFormat string `yaml:"load_format"`
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
EnforceEager bool `yaml:"enforce_eager"` // vLLM
SwapSpace int `yaml:"swap_space"` // vLLM
MaxModelLen int `yaml:"max_model_len"` // vLLM
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
MMProj string `yaml:"mmproj"`
FlashAttention bool `yaml:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading"`
CacheTypeK string `yaml:"cache_type_k"`
CacheTypeV string `yaml:"cache_type_v"`
FlashAttention bool `yaml:"flash_attention"`
NoKVOffloading bool `yaml:"no_kv_offloading"`
RopeScaling string `yaml:"rope_scaling"`
ModelType string `yaml:"type"`
@@ -167,8 +162,6 @@ type LLMConfig struct {
YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
YarnBetaFast float32 `yaml:"yarn_beta_fast"`
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
}
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
@@ -206,8 +199,6 @@ type TemplateConfig struct {
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
Multimodal string `yaml:"multimodal"`
JinjaTemplate bool `yaml:"jinja_template"`
}
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {

View File

@@ -140,7 +140,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
}
}
cfg.SetDefaults(append(opts, ModelPath(modelPath))...)
cfg.SetDefaults(opts...)
return cfg, nil
}

View File

@@ -26,14 +26,14 @@ const (
type settingsConfig struct {
StopWords []string
TemplateConfig TemplateConfig
RepeatPenalty float64
RepeatPenalty float64
}
// default settings to adopt with a given model family
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
Gemma: {
RepeatPenalty: 1.0,
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
TemplateConfig: TemplateConfig{
Chat: "{{.Input }}\n<start_of_turn>model\n",
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
@@ -200,18 +200,6 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
} else {
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
}
if cfg.HasTemplate() {
return
}
// identify from well known templates first, otherwise use the raw jinja template
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
if found {
// try to use the jinja template
cfg.TemplateConfig.JinjaTemplate = true
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
}
}
func identifyFamily(f *gguf.GGUFFile) familyType {

View File

@@ -12,8 +12,6 @@ import (
"gopkg.in/yaml.v3"
)
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
var _ = Describe("Model test", func() {
Context("Downloading", func() {
@@ -49,7 +47,7 @@ var _ = Describe("Model test", func() {
gallery := []GalleryModel{{
Name: "bert",
URL: bertEmbeddingsURL,
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
}}
out, err := yaml.Marshal(gallery)
Expect(err).ToNot(HaveOccurred())
@@ -68,7 +66,7 @@ var _ = Describe("Model test", func() {
Expect(err).ToNot(HaveOccurred())
Expect(len(models)).To(Equal(1))
Expect(models[0].Name).To(Equal("bert"))
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
Expect(models[0].Installed).To(BeFalse())
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
@@ -80,7 +78,7 @@ var _ = Describe("Model test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
Expect(content["backend"]).To(Equal("bert-embeddings"))
models, err = AvailableGalleryModels(galleries, tempdir)
Expect(err).ToNot(HaveOccurred())

View File

@@ -14,9 +14,10 @@ import (
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/http/routes"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
"github.com/gofiber/contrib/fiberzerolog"
"github.com/gofiber/fiber/v2"
@@ -48,18 +49,18 @@ var embedDirStatic embed.FS
// @in header
// @name Authorization
func API(application *application.Application) (*fiber.App, error) {
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
fiberCfg := fiber.Config{
Views: renderEngine(),
BodyLimit: application.ApplicationConfig().UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
// We disable the Fiber startup message as it does not conform to structured logging.
// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
DisableStartupMessage: true,
// Override default error handler
}
if !application.ApplicationConfig().OpaqueErrors {
if !appConfig.OpaqueErrors {
// Normally, return errors as JSON responses
fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -85,9 +86,9 @@ func API(application *application.Application) (*fiber.App, error) {
}
}
router := fiber.New(fiberCfg)
app := fiber.New(fiberCfg)
router.Hooks().OnListen(func(listenData fiber.ListenData) error {
app.Hooks().OnListen(func(listenData fiber.ListenData) error {
scheme := "http"
if listenData.TLS {
scheme = "https"
@@ -98,82 +99,82 @@ func API(application *application.Application) (*fiber.App, error) {
// Have Fiber use zerolog like the rest of the application rather than it's built-in logger
logger := log.Logger
router.Use(fiberzerolog.New(fiberzerolog.Config{
app.Use(fiberzerolog.New(fiberzerolog.Config{
Logger: &logger,
}))
// Default middleware config
if !application.ApplicationConfig().Debug {
router.Use(recover.New())
if !appConfig.Debug {
app.Use(recover.New())
}
if !application.ApplicationConfig().DisableMetrics {
if !appConfig.DisableMetrics {
metricsService, err := services.NewLocalAIMetricsService()
if err != nil {
return nil, err
}
if metricsService != nil {
router.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
router.Hooks().OnShutdown(func() error {
app.Use(localai.LocalAIMetricsAPIMiddleware(metricsService))
app.Hooks().OnShutdown(func() error {
return metricsService.Shutdown()
})
}
}
// Health Checks should always be exempt from auth, so register these first
routes.HealthRoutes(router)
routes.HealthRoutes(app)
kaConfig, err := middleware.GetKeyAuthConfig(application.ApplicationConfig())
kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
if err != nil || kaConfig == nil {
return nil, fmt.Errorf("failed to create key auth config: %w", err)
}
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
router.Use(v2keyauth.New(*kaConfig))
app.Use(v2keyauth.New(*kaConfig))
if application.ApplicationConfig().CORS {
if appConfig.CORS {
var c func(ctx *fiber.Ctx) error
if application.ApplicationConfig().CORSAllowOrigins == "" {
if appConfig.CORSAllowOrigins == "" {
c = cors.New()
} else {
c = cors.New(cors.Config{AllowOrigins: application.ApplicationConfig().CORSAllowOrigins})
c = cors.New(cors.Config{AllowOrigins: appConfig.CORSAllowOrigins})
}
router.Use(c)
app.Use(c)
}
if application.ApplicationConfig().CSRF {
if appConfig.CSRF {
log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
router.Use(csrf.New())
app.Use(csrf.New())
}
// Load config jsons
utils.LoadConfig(application.ApplicationConfig().UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(application.ApplicationConfig().ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
galleryService := services.NewGalleryService(application.ApplicationConfig())
galleryService.Start(application.ApplicationConfig().Context, application.BackendLoader())
galleryService := services.NewGalleryService(appConfig)
galleryService.Start(appConfig.Context, cl)
routes.RegisterElevenLabsRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterLocalAIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
routes.RegisterOpenAIRoutes(router, application)
if !application.ApplicationConfig().DisableWebUI {
routes.RegisterUIRoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig(), galleryService)
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
if !appConfig.DisableWebUI {
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
}
routes.RegisterJINARoutes(router, application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterJINARoutes(app, cl, ml, appConfig)
httpFS := http.FS(embedDirStatic)
router.Use(favicon.New(favicon.Config{
app.Use(favicon.New(favicon.Config{
URL: "/favicon.ico",
FileSystem: httpFS,
File: "static/favicon.ico",
}))
router.Use("/static", filesystem.New(filesystem.Config{
app.Use("/static", filesystem.New(filesystem.Config{
Root: httpFS,
PathPrefix: "static",
Browse: true,
@@ -181,7 +182,7 @@ func API(application *application.Application) (*fiber.App, error) {
// Define a custom 404 handler
// Note: keep this at the bottom!
router.Use(notFoundHandler)
app.Use(notFoundHandler)
return router, nil
return app, nil
}

View File

@@ -5,21 +5,24 @@ import (
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"runtime"
"strings"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/core/http"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/startup"
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"gopkg.in/yaml.v3"
@@ -237,8 +240,6 @@ func postInvalidRequest(url string) (error, int) {
return nil, resp.StatusCode
}
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
//go:embed backend-assets/*
var backendAssets embed.FS
@@ -251,6 +252,9 @@ var _ = Describe("API test", func() {
var cancel context.CancelFunc
var tmpdir string
var modelDir string
var bcl *config.BackendConfigLoader
var ml *model.ModelLoader
var applicationConfig *config.ApplicationConfig
commonOpts := []config.AppOption{
config.WithDebug(true),
@@ -275,13 +279,13 @@ var _ = Describe("API test", func() {
g := []gallery.GalleryModel{
{
Name: "bert",
URL: bertEmbeddingsURL,
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
},
{
Name: "bert2",
URL: bertEmbeddingsURL,
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Overrides: map[string]interface{}{"foo": "bar"},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
},
}
out, err := yaml.Marshal(g)
@@ -296,7 +300,7 @@ var _ = Describe("API test", func() {
},
}
application, err := application.New(
bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithGalleries(galleries),
@@ -306,7 +310,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(backendAssetsDir))...)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -341,7 +345,7 @@ var _ = Describe("API test", func() {
It("Should fail if the api key is missing", func() {
err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
Expect(err).ToNot(BeNil())
Expect(sc).To(Equal(401))
Expect(sc).To(Equal(403))
})
})
@@ -379,7 +383,7 @@ var _ = Describe("API test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["foo"]).To(Equal("bar"))
models, err = getModels("http://127.0.0.1:9090/models/available")
@@ -398,7 +402,7 @@ var _ = Describe("API test", func() {
It("overrides models", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: bertEmbeddingsURL,
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Name: "bert",
Overrides: map[string]interface{}{
"backend": "llama",
@@ -434,7 +438,7 @@ var _ = Describe("API test", func() {
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "900s", "10s").Should(Equal(true))
}, "360s", "10s").Should(Equal(true))
Eventually(func() []string {
models, _ := client.ListModels(context.TODO())
@@ -447,7 +451,7 @@ var _ = Describe("API test", func() {
})
It("apply models without overrides", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: bertEmbeddingsURL,
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Name: "bert",
Overrides: map[string]interface{}{},
})
@@ -467,7 +471,7 @@ var _ = Describe("API test", func() {
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
Expect(content["backend"]).To(Equal("bert-embeddings"))
})
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
@@ -535,7 +539,7 @@ var _ = Describe("API test", func() {
var res map[string]string
err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
Expect(err).ToNot(HaveOccurred())
Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
@@ -558,7 +562,7 @@ var _ = Describe("API test", func() {
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
return response["processed"].(bool)
}, "900s", "10s").Should(Equal(true))
}, "360s", "10s").Should(Equal(true))
By("testing chat")
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
@@ -637,7 +641,7 @@ var _ = Describe("API test", func() {
},
}
application, err := application.New(
bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithAudioDir(tmpdir),
@@ -648,7 +652,7 @@ var _ = Describe("API test", func() {
config.WithBackendAssetsOutput(tmpdir))...,
)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -768,14 +772,14 @@ var _ = Describe("API test", func() {
var err error
application, err := application.New(
bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
config.WithContext(c),
config.WithModelPath(modelPath),
)...)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
@@ -802,7 +806,7 @@ var _ = Describe("API test", func() {
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
})
It("can generate completions via ggml", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
@@ -862,8 +866,8 @@ var _ = Describe("API test", func() {
},
)
Expect(err).ToNot(HaveOccurred(), err)
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
@@ -907,6 +911,71 @@ var _ = Describe("API test", func() {
})
})
Context("backends", func() {
It("runs rwkv completion", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
})
Expect(err).ToNot(HaveOccurred())
defer stream.Close()
tokens := 0
text := ""
for {
response, err := stream.Recv()
if errors.Is(err, io.EOF) {
break
}
Expect(err).ToNot(HaveOccurred())
text += response.Choices[0].Text
tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(text).To(ContainSubstring("five"))
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
It("runs rwkv chat completion", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
Expect(err).ToNot(HaveOccurred())
defer stream.Close()
tokens := 0
text := ""
for {
response, err := stream.Recv()
if errors.Is(err, io.EOF) {
break
}
Expect(err).ToNot(HaveOccurred())
text += response.Choices[0].Delta.Content
tokens++
}
Expect(text).ToNot(BeEmpty())
Expect(strings.ToLower(text)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
})
})
// See tests/integration/stores_test
Context("Stores", Label("stores"), func() {
@@ -986,14 +1055,14 @@ var _ = Describe("API test", func() {
c, cancel = context.WithCancel(context.Background())
var err error
application, err := application.New(
bcl, ml, applicationConfig, err = startup.Startup(
append(commonOpts,
config.WithContext(c),
config.WithModelPath(modelPath),
config.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
)
Expect(err).ToNot(HaveOccurred())
app, err = API(application)
app, err = App(bcl, ml, applicationConfig)
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")

View File

@@ -1,97 +0,0 @@
package elements
import (
"strings"
"github.com/chasefleming/elem-go"
"github.com/chasefleming/elem-go/attrs"
"github.com/mudler/LocalAI/core/gallery"
)
func installButton(galleryName string) elem.Node {
return elem.Button(
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"hx-swap": "outerHTML",
// post the Model ID as param
"hx-post": "/browse/install/model/" + galleryName,
},
elem.I(
attrs.Props{
"class": "fa-solid fa-download pr-2",
},
),
elem.Text("Install"),
)
}
func reInstallButton(galleryName string) elem.Node {
return elem.Button(
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"class": "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"hx-target": "#action-div-" + dropBadChars(galleryName),
"hx-swap": "outerHTML",
// post the Model ID as param
"hx-post": "/browse/install/model/" + galleryName,
},
elem.I(
attrs.Props{
"class": "fa-solid fa-arrow-rotate-right pr-2",
},
),
elem.Text("Reinstall"),
)
}
func infoButton(m *gallery.GalleryModel) elem.Node {
return elem.Button(
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"class": "float-left inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"data-modal-target": modalName(m),
"data-modal-toggle": modalName(m),
},
elem.P(
attrs.Props{
"class": "flex items-center",
},
elem.I(
attrs.Props{
"class": "fas fa-info-circle pr-2",
},
),
elem.Text("Info"),
),
)
}
func deleteButton(galleryID string) elem.Node {
return elem.Button(
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"hx-confirm": "Are you sure you wish to delete the model?",
"class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"hx-target": "#action-div-" + dropBadChars(galleryID),
"hx-swap": "outerHTML",
// post the Model ID as param
"hx-post": "/browse/delete/model/" + galleryID,
},
elem.I(
attrs.Props{
"class": "fa-solid fa-cancel pr-2",
},
),
elem.Text("Delete"),
)
}
// Javascript/HTMX doesn't like weird IDs
func dropBadChars(s string) string {
return strings.ReplaceAll(s, "@", "__")
}

View File

@@ -2,11 +2,13 @@ package elements
import (
"fmt"
"strings"
"github.com/chasefleming/elem-go"
"github.com/chasefleming/elem-go/attrs"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
)
@@ -14,6 +16,231 @@ const (
noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
)
func renderElements(n []elem.Node) string {
render := ""
for _, r := range n {
render += r.Render()
}
return render
}
func DoneProgress(galleryID, text string, showDelete bool) string {
var modelName = galleryID
// Split by @ and grab the name
if strings.Contains(galleryID, "@") {
modelName = strings.Split(galleryID, "@")[1]
}
return elem.Div(
attrs.Props{
"id": "action-div-" + dropBadChars(galleryID),
},
elem.H3(
attrs.Props{
"role": "status",
"id": "pblabel",
"tabindex": "-1",
"autofocus": "",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
),
elem.If(showDelete, deleteButton(galleryID, modelName), reInstallButton(galleryID)),
).Render()
}
func ErrorProgress(err, galleryName string) string {
return elem.Div(
attrs.Props{},
elem.H3(
attrs.Props{
"role": "status",
"id": "pblabel",
"tabindex": "-1",
"autofocus": "",
},
elem.Text("Error "+bluemonday.StrictPolicy().Sanitize(err)),
),
installButton(galleryName),
).Render()
}
func ProgressBar(progress string) string {
return elem.Div(attrs.Props{
"class": "progress",
"role": "progressbar",
"aria-valuemin": "0",
"aria-valuemax": "100",
"aria-valuenow": "0",
"aria-labelledby": "pblabel",
},
elem.Div(attrs.Props{
"id": "pb",
"class": "progress-bar",
"style": "width:" + progress + "%",
}),
).Render()
}
func P2PNodeStats(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
<p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
{{ $online := 0 }}
{{ range .Nodes }}
{{ if .IsOnline }}
{{ $online = add $online 1 }}
{{ end }}
{{ end }}
<p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
</div>
*/
online := 0
for _, n := range nodes {
if n.IsOnline() {
online++
}
}
class := "text-green-500"
if online == 0 {
class = "text-red-500"
}
/*
<i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
*/
circle := elem.I(attrs.Props{
"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
})
nodesElements := []elem.Node{
elem.Span(
attrs.Props{
"class": class,
},
circle,
elem.Text(fmt.Sprintf("%d", online)),
),
elem.Span(
attrs.Props{
"class": "text-gray-200",
},
elem.Text(fmt.Sprintf("/%d", len(nodes))),
),
}
return renderElements(nodesElements)
}
func P2PNodeBoxes(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
<div class="flex items-center mb-2">
<i class="fas fa-desktop text-gray-400 mr-2"></i>
<span class="text-gray-200 font-semibold">{{.ID}}</span>
</div>
<p class="text-sm text-gray-400 mt-2 flex items-center">
Status:
<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
</span>
</p>
</div>
*/
nodesElements := []elem.Node{}
for _, n := range nodes {
nodesElements = append(nodesElements,
elem.Div(
attrs.Props{
"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
},
elem.P(
attrs.Props{
"class": "text-sm text-gray-400 mt-2 flex",
},
elem.I(
attrs.Props{
"class": "fas fa-desktop text-gray-400 mr-2",
},
),
elem.Text("Name: "),
elem.Span(
attrs.Props{
"class": "text-gray-200 font-semibold ml-2 mr-1",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)),
),
elem.Text("Status: "),
elem.If(
n.IsOnline(),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
},
),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
},
),
),
elem.If(
n.IsOnline(),
elem.Span(
attrs.Props{
"class": "text-green-400",
},
elem.Text("Online"),
),
elem.Span(
attrs.Props{
"class": "text-red-400",
},
elem.Text("Offline"),
),
),
),
))
}
return renderElements(nodesElements)
}
func StartProgressBar(uid, progress, text string) string {
if progress == "" {
progress = "0"
}
return elem.Div(
attrs.Props{
"hx-trigger": "done",
"hx-get": "/browse/job/" + uid,
"hx-swap": "outerHTML",
"hx-target": "this",
},
elem.H3(
attrs.Props{
"role": "status",
"id": "pblabel",
"tabindex": "-1",
"autofocus": "",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
elem.Div(attrs.Props{
"hx-get": "/browse/job/progress/" + uid,
"hx-trigger": "every 600ms",
"hx-target": "this",
"hx-swap": "innerHTML",
},
elem.Raw(ProgressBar(progress)),
),
),
).Render()
}
func cardSpan(text, icon string) elem.Node {
return elem.Span(
attrs.Props{
@@ -41,6 +268,7 @@ func searchableElement(text, icon string) elem.Node {
attrs.Props{
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
},
elem.A(
attrs.Props{
// "name": "search",
@@ -62,8 +290,7 @@ func searchableElement(text, icon string) elem.Node {
)
}
/*
func buttonLink(text, url string) elem.Node {
func link(text, url string) elem.Node {
return elem.A(
attrs.Props{
"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
@@ -76,255 +303,163 @@ func buttonLink(text, url string) elem.Node {
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
)
}
*/
func link(text, url string) elem.Node {
return elem.A(
func installButton(galleryName string) elem.Node {
return elem.Button(
attrs.Props{
"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
"href": url,
"target": "_blank",
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"class": "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"hx-swap": "outerHTML",
// post the Model ID as param
"hx-post": "/browse/install/model/" + galleryName,
},
elem.I(attrs.Props{
"class": "fas fa-link pr-2",
}),
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
elem.I(
attrs.Props{
"class": "fa-solid fa-download pr-2",
},
),
elem.Text("Install"),
)
}
func reInstallButton(galleryName string) elem.Node {
return elem.Button(
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"class": "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"hx-target": "#action-div-" + dropBadChars(galleryName),
"hx-swap": "outerHTML",
// post the Model ID as param
"hx-post": "/browse/install/model/" + galleryName,
},
elem.I(
attrs.Props{
"class": "fa-solid fa-arrow-rotate-right pr-2",
},
),
elem.Text("Reinstall"),
)
}
func deleteButton(galleryID, modelName string) elem.Node {
return elem.Button(
attrs.Props{
"data-twe-ripple-init": "",
"data-twe-ripple-color": "light",
"hx-confirm": "Are you sure you wish to delete the model?",
"class": "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
"hx-target": "#action-div-" + dropBadChars(galleryID),
"hx-swap": "outerHTML",
// post the Model ID as param
"hx-post": "/browse/delete/model/" + galleryID,
},
elem.I(
attrs.Props{
"class": "fa-solid fa-cancel pr-2",
},
),
elem.Text("Delete"),
)
}
// Javascript/HTMX doesn't like weird IDs
func dropBadChars(s string) string {
return strings.ReplaceAll(s, "@", "__")
}
type ProcessTracker interface {
Exists(string) bool
Get(string) string
}
func modalName(m *gallery.GalleryModel) string {
return m.Name + "-modal"
}
func modelDescription(m *gallery.GalleryModel) elem.Node {
urls := []elem.Node{}
for _, url := range m.URLs {
urls = append(urls,
elem.Li(attrs.Props{}, link(url, url)),
)
}
tagsNodes := []elem.Node{}
for _, tag := range m.Tags {
tagsNodes = append(tagsNodes,
searchableElement(tag, "fas fa-tag"),
)
}
return elem.Div(
attrs.Props{
"class": "p-6 text-surface dark:text-white",
},
elem.H5(
func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
modelsElements := []elem.Node{}
descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
return elem.Div(
attrs.Props{
"class": "mb-2 text-xl font-bold leading-tight",
"class": "p-6 text-surface dark:text-white",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
),
elem.Div( // small description
attrs.Props{
"class": "mb-4 text-sm truncate text-base",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
),
elem.Div(
attrs.Props{
"id": modalName(m),
"tabindex": "-1",
"aria-hidden": "true",
"class": "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
},
elem.Div(
elem.H5(
attrs.Props{
"class": "relative p-4 w-full max-w-2xl max-h-full",
"class": "mb-2 text-xl font-bold leading-tight",
},
elem.Div(
attrs.Props{
"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
},
// header
elem.Div(
attrs.Props{
"class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600",
},
elem.H3(
attrs.Props{
"class": "text-xl font-semibold text-gray-900 dark:text-white",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
),
elem.Button( // close button
attrs.Props{
"class": "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white",
"data-modal-hide": modalName(m),
},
elem.Raw(
`<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
</svg>`,
),
elem.Span(
attrs.Props{
"class": "sr-only",
},
elem.Text("Close modal"),
),
),
),
// body
elem.Div(
attrs.Props{
"class": "p-4 md:p-5 space-y-4",
},
elem.Div(
attrs.Props{
"class": "flex justify-center items-center",
},
elem.Img(attrs.Props{
// "class": "rounded-t-lg object-fit object-center h-96",
"class": "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
"src": m.Icon,
"loading": "lazy",
}),
),
elem.P(
attrs.Props{
"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
),
elem.Hr(
attrs.Props{},
),
elem.P(
attrs.Props{
"class": "text-sm font-semibold text-gray-900 dark:text-white",
},
elem.Text("Links"),
),
elem.Ul(
attrs.Props{},
urls...,
),
elem.If(
len(m.Tags) > 0,
elem.Div(
attrs.Props{},
elem.P(
attrs.Props{
"class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white",
},
elem.Text("Tags"),
),
elem.Div(
attrs.Props{
"class": "flex flex-row flex-wrap content-center",
},
tagsNodes...,
),
),
elem.Div(attrs.Props{}),
),
),
// Footer
elem.Div(
attrs.Props{
"class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600",
},
elem.Button(
attrs.Props{
"data-modal-hide": modalName(m),
"class": "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700",
},
elem.Text("Close"),
),
),
),
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
),
elem.P(
attrs.Props{
"class": "mb-4 text-sm [&:not(:hover)]:truncate text-base",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
),
),
)
}
func modelActionItems(m *gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) elem.Node {
galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
currentlyProcessing := processTracker.Exists(galleryID)
jobID := ""
isDeletionOp := false
if currentlyProcessing {
status := galleryService.GetStatus(galleryID)
if status != nil && status.Deletion {
isDeletionOp = true
}
jobID = processTracker.Get(galleryID)
// TODO:
// case not handled, if status == nil : "Waiting"
}
nodes := []elem.Node{
cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
}
if m.License != "" {
nodes = append(nodes,
cardSpan("License: "+m.License, "fas fa-book"),
)
}
/*
tagsNodes := []elem.Node{}
for _, tag := range m.Tags {
tagsNodes = append(tagsNodes,
searchableElement(tag, "fas fa-tag"),
)
actionDiv := func(m *gallery.GalleryModel) elem.Node {
galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
currentlyProcessing := processTracker.Exists(galleryID)
jobID := ""
isDeletionOp := false
if currentlyProcessing {
status := galleryService.GetStatus(galleryID)
if status != nil && status.Deletion {
isDeletionOp = true
}
jobID = processTracker.Get(galleryID)
// TODO:
// case not handled, if status == nil : "Waiting"
}
nodes := []elem.Node{
cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
}
nodes = append(nodes,
elem.Div(
attrs.Props{
"class": "flex flex-row flex-wrap content-center",
},
tagsNodes...,
),
)
if m.License != "" {
nodes = append(nodes,
cardSpan("License: "+m.License, "fas fa-book"),
)
}
for i, url := range m.URLs {
nodes = append(nodes,
buttonLink("Link #"+fmt.Sprintf("%d", i+1), url),
)
}
*/
tagsNodes := []elem.Node{}
for _, tag := range m.Tags {
tagsNodes = append(tagsNodes,
searchableElement(tag, "fas fa-tag"),
)
}
progressMessage := "Installation"
if isDeletionOp {
progressMessage = "Deletion"
}
return elem.Div(
attrs.Props{
"class": "px-6 pt-4 pb-2",
},
elem.P(
attrs.Props{
"class": "mb-4 text-base",
},
nodes...,
),
elem.Div(
attrs.Props{
"id": "action-div-" + dropBadChars(galleryID),
"class": "flow-root", // To order buttons left and right
},
infoButton(m),
nodes = append(nodes,
elem.Div(
attrs.Props{
"class": "float-right",
"class": "flex flex-row flex-wrap content-center",
},
tagsNodes...,
),
)
for i, url := range m.URLs {
nodes = append(nodes,
link("Link #"+fmt.Sprintf("%d", i+1), url),
)
}
progressMessage := "Installation"
if isDeletionOp {
progressMessage = "Deletion"
}
return elem.Div(
attrs.Props{
"class": "px-6 pt-4 pb-2",
},
elem.P(
attrs.Props{
"class": "mb-4 text-base",
},
nodes...,
),
elem.Div(
attrs.Props{
"id": "action-div-" + dropBadChars(galleryID),
},
elem.If(
currentlyProcessing,
@@ -335,18 +470,14 @@ func modelActionItems(m *gallery.GalleryModel, processTracker ProcessTracker, ga
elem.Node(elem.Div(
attrs.Props{},
reInstallButton(m.ID()),
deleteButton(m.ID()),
deleteButton(m.ID(), m.Name),
)),
installButton(m.ID()),
),
),
),
),
)
}
func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
modelsElements := []elem.Node{}
)
}
for _, m := range models {
elems := []elem.Node{}
@@ -390,10 +521,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
))
}
elems = append(elems,
modelDescription(m),
modelActionItems(m, processTracker, galleryService),
)
elems = append(elems, descriptionDiv(m), actionDiv(m))
modelsElements = append(modelsElements,
elem.Div(
attrs.Props{

View File

@@ -1,147 +0,0 @@
package elements
import (
"fmt"
"github.com/chasefleming/elem-go"
"github.com/chasefleming/elem-go/attrs"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/p2p"
)
func renderElements(n []elem.Node) string {
render := ""
for _, r := range n {
render += r.Render()
}
return render
}
func P2PNodeStats(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
<p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
{{ $online := 0 }}
{{ range .Nodes }}
{{ if .IsOnline }}
{{ $online = add $online 1 }}
{{ end }}
{{ end }}
<p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
</div>
*/
online := 0
for _, n := range nodes {
if n.IsOnline() {
online++
}
}
class := "text-green-500"
if online == 0 {
class = "text-red-500"
}
/*
<i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
*/
circle := elem.I(attrs.Props{
"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
})
nodesElements := []elem.Node{
elem.Span(
attrs.Props{
"class": class,
},
circle,
elem.Text(fmt.Sprintf("%d", online)),
),
elem.Span(
attrs.Props{
"class": "text-gray-200",
},
elem.Text(fmt.Sprintf("/%d", len(nodes))),
),
}
return renderElements(nodesElements)
}
func P2PNodeBoxes(nodes []p2p.NodeData) string {
/*
<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
<div class="flex items-center mb-2">
<i class="fas fa-desktop text-gray-400 mr-2"></i>
<span class="text-gray-200 font-semibold">{{.ID}}</span>
</div>
<p class="text-sm text-gray-400 mt-2 flex items-center">
Status:
<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
</span>
</p>
</div>
*/
nodesElements := []elem.Node{}
for _, n := range nodes {
nodesElements = append(nodesElements,
elem.Div(
attrs.Props{
"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
},
elem.P(
attrs.Props{
"class": "text-sm text-gray-400 mt-2 flex",
},
elem.I(
attrs.Props{
"class": "fas fa-desktop text-gray-400 mr-2",
},
),
elem.Text("Name: "),
elem.Span(
attrs.Props{
"class": "text-gray-200 font-semibold ml-2 mr-1",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)),
),
elem.Text("Status: "),
elem.If(
n.IsOnline(),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
},
),
elem.I(
attrs.Props{
"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
},
),
),
elem.If(
n.IsOnline(),
elem.Span(
attrs.Props{
"class": "text-green-400",
},
elem.Text("Online"),
),
elem.Span(
attrs.Props{
"class": "text-red-400",
},
elem.Text("Offline"),
),
),
),
))
}
return renderElements(nodesElements)
}

View File

@@ -1,89 +0,0 @@
package elements
import (
"github.com/chasefleming/elem-go"
"github.com/chasefleming/elem-go/attrs"
"github.com/microcosm-cc/bluemonday"
)
func DoneProgress(galleryID, text string, showDelete bool) string {
return elem.Div(
attrs.Props{
"id": "action-div-" + dropBadChars(galleryID),
},
elem.H3(
attrs.Props{
"role": "status",
"id": "pblabel",
"tabindex": "-1",
"autofocus": "",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
),
elem.If(showDelete, deleteButton(galleryID), reInstallButton(galleryID)),
).Render()
}
func ErrorProgress(err, galleryName string) string {
return elem.Div(
attrs.Props{},
elem.H3(
attrs.Props{
"role": "status",
"id": "pblabel",
"tabindex": "-1",
"autofocus": "",
},
elem.Text("Error "+bluemonday.StrictPolicy().Sanitize(err)),
),
installButton(galleryName),
).Render()
}
func ProgressBar(progress string) string {
return elem.Div(attrs.Props{
"class": "progress",
"role": "progressbar",
"aria-valuemin": "0",
"aria-valuemax": "100",
"aria-valuenow": "0",
"aria-labelledby": "pblabel",
},
elem.Div(attrs.Props{
"id": "pb",
"class": "progress-bar",
"style": "width:" + progress + "%",
}),
).Render()
}
func StartProgressBar(uid, progress, text string) string {
if progress == "" {
progress = "0"
}
return elem.Div(
attrs.Props{
"hx-trigger": "done",
"hx-get": "/browse/job/" + uid,
"hx-swap": "outerHTML",
"hx-target": "this",
},
elem.H3(
attrs.Props{
"role": "status",
"id": "pblabel",
"tabindex": "-1",
"autofocus": "",
},
elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
elem.Div(attrs.Props{
"hx-get": "/browse/job/progress/" + uid,
"hx-trigger": "every 600ms",
"hx-target": "this",
"hx-swap": "innerHTML",
},
elem.Raw(ProgressBar(progress)),
),
),
).Render()
}

View File

@@ -21,15 +21,10 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
for b := range appConfig.ExternalGRPCBackends {
availableBackends = append(availableBackends, b)
}
sysmodels := []schema.SysInfoModel{}
for _, m := range loadedModels {
sysmodels = append(sysmodels, schema.SysInfoModel{ID: m.ID})
}
return c.JSON(
schema.SystemInformationResponse{
Backends: availableBackends,
Models: sysmodels,
Models: loadedModels,
},
)
}

View File

@@ -9,19 +9,16 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/schema"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/pkg/utils"
)
// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
//
// @Summary Generates audio from the input text.
// @Accept json
// @Produce audio/x-wav
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "generated audio/wav file"
// @Router /v1/audio/speech [post]
// @Router /tts [post]
// @Summary Generates audio from the input text.
// @Accept json
// @Produce audio/x-wav
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "generated audio/wav file"
// @Router /v1/audio/speech [post]
// @Router /tts [post]
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
@@ -70,13 +67,6 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
if err != nil {
return err
}
// Convert generated file to target format
filePath, err = utils.AudioConvert(filePath, input.Format)
if err != nil {
return err
}
return c.Download(filePath)
}
}

View File

@@ -1,68 +0,0 @@
package localai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
// VADEndpoint is Voice-Activation-Detection endpoint
// @Summary Detect voice fragments in an audio stream
// @Accept json
// @Param request body schema.VADRequest true "query params"
// @Success 200 {object} proto.VADResponse "Response"
// @Router /vad [post]
func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.VADRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)
if err != nil {
log.Err(err)
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Request for model: %s", modelFile)
opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
vadModel, err := ml.Load(opts...)
if err != nil {
return err
}
req := proto.VADRequest{
Audio: input.Audio,
}
resp, err := vadModel.VAD(c.Context(), &req)
if err != nil {
return err
}
return c.JSON(resp)
}
}

View File

@@ -14,8 +14,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/mudler/LocalAI/pkg/templates"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
@@ -26,7 +24,7 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
var id, textContentToReturn string
var created int
@@ -296,10 +294,148 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
// If we are using the tokenizer template, we don't need to process the messages
// unless we are processing functions
if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
predInput = evaluator.TemplateMessages(input.Messages, config, funcs, shouldUseFn)
suppressConfigSystemPrompt := false
mess := []string{}
for messageIndex, i := range input.Messages {
var content string
role := i.Role
// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
roleFn := "assistant_function_call"
r := config.Roles[roleFn]
if r != "" {
role = roleFn
}
}
r := config.Roles[role]
contentExists := i.Content != nil && i.StringContent != ""
fcall := i.FunctionCall
if len(i.ToolCalls) > 0 {
fcall = i.ToolCalls
}
// First attempt to populate content via a chat message specific template
if config.TemplateConfig.ChatMessage != "" {
chatMessageData := model.ChatMessageTemplateData{
SystemPrompt: config.SystemPrompt,
Role: r,
RoleName: role,
Content: i.StringContent,
FunctionCall: fcall,
FunctionName: i.Name,
LastMessage: messageIndex == (len(input.Messages) - 1),
Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
MessageIndex: messageIndex,
}
templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
if err != nil {
log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
} else {
if templatedChatMessage == "" {
log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
}
log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
content = templatedChatMessage
}
}
marshalAnyRole := func(f any) {
j, err := json.Marshal(f)
if err == nil {
if contentExists {
content += "\n" + fmt.Sprint(r, " ", string(j))
} else {
content = fmt.Sprint(r, " ", string(j))
}
}
}
marshalAny := func(f any) {
j, err := json.Marshal(f)
if err == nil {
if contentExists {
content += "\n" + string(j)
} else {
content = string(j)
}
}
}
// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
if content == "" {
if r != "" {
if contentExists {
content = fmt.Sprint(r, i.StringContent)
}
if i.FunctionCall != nil {
marshalAnyRole(i.FunctionCall)
}
if i.ToolCalls != nil {
marshalAnyRole(i.ToolCalls)
}
} else {
if contentExists {
content = fmt.Sprint(i.StringContent)
}
if i.FunctionCall != nil {
marshalAny(i.FunctionCall)
}
if i.ToolCalls != nil {
marshalAny(i.ToolCalls)
}
}
// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
if contentExists && role == "system" {
suppressConfigSystemPrompt = true
}
}
mess = append(mess, content)
}
joinCharacter := "\n"
if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
}
predInput = strings.Join(mess, joinCharacter)
log.Debug().Msgf("Prompt (before templating): %s", predInput)
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Chat != "" && !shouldUseFn {
templateFile = config.TemplateConfig.Chat
}
if config.TemplateConfig.Functions != "" && shouldUseFn {
templateFile = config.TemplateConfig.Functions
}
if templateFile != "" {
templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
SuppressSystemPrompt: suppressConfigSystemPrompt,
Input: predInput,
Functions: funcs,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
} else {
log.Debug().Msgf("Template failed loading: %s", err.Error())
}
}
log.Debug().Msgf("Prompt (after templating): %s", predInput)
if config.Grammar != "" {
if shouldUseFn && config.Grammar != "" {
log.Debug().Msgf("Grammar: %+v", config.Grammar)
}
}

View File

@@ -16,7 +16,6 @@ import (
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
@@ -26,7 +25,7 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/completions [post]
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
id := uuid.New().String()
created := int(time.Now().Unix())
@@ -95,6 +94,17 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
c.Set("Transfer-Encoding", "chunked")
}
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Completion != "" {
templateFile = config.TemplateConfig.Completion
}
if input.Stream {
if len(config.PromptStrings) > 1 {
return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
@@ -102,13 +112,15 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
predInput := config.PromptStrings[0]
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
Input: predInput,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
if templateFile != "" {
templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
Input: predInput,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
}
responses := make(chan schema.OpenAIResponse)
@@ -153,13 +165,16 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e
totalTokenUsage := backend.TokenUsage{}
for k, i := range config.PromptStrings {
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.CompletionPromptTemplate, *config, templates.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
Input: i,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
if templateFile != "" {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
SystemPrompt: config.SystemPrompt,
Input: i,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
}
r, tokenUsage, err := ComputeChoices(

View File

@@ -12,7 +12,6 @@ import (
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/schema"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/templates"
"github.com/rs/zerolog/log"
)
@@ -22,8 +21,7 @@ import (
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/edits [post]
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
if err != nil {
@@ -37,18 +35,31 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
log.Debug().Msgf("Parameter Config: %+v", config)
templateFile := ""
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
templateFile = config.Model
}
if config.TemplateConfig.Edit != "" {
templateFile = config.TemplateConfig.Edit
}
var result []schema.Choice
totalTokenUsage := backend.TokenUsage{}
for _, i := range config.InputStrings {
templatedInput, err := evaluator.EvaluateTemplateForPrompt(templates.EditPromptTemplate, *config, templates.PromptTemplateData{
Input: i,
Instruction: input.Instruction,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
if templateFile != "" {
templatedInput, err := ml.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
Input: i,
Instruction: input.Instruction,
SystemPrompt: config.SystemPrompt,
})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
}
r, tokenUsage, err := ComputeChoices(input, i, config, appConfig, ml, func(s string, c *[]schema.Choice) {

View File

@@ -136,11 +136,6 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
config.Backend = model.StableDiffusionBackend
}
if !strings.Contains(input.Size, "x") {
input.Size = "512x512"
log.Warn().Msgf("Invalid size, using default 512x512")
}
sizeParts := strings.Split(input.Size, "x")
if len(sizeParts) != 2 {
return fmt.Errorf("invalid value for 'size'")

View File

@@ -304,6 +304,7 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c
config.LoadOptionThreads(threads),
config.LoadOptionContextSize(ctx),
config.LoadOptionF16(f16),
config.ModelPath(loader.ModelPath),
)
// Set the parameters for the language model prediction

View File

@@ -1,95 +1,95 @@
package middleware
import (
"crypto/subtle"
"errors"
"github.com/dave-gray101/v2keyauth"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/keyauth"
"github.com/mudler/LocalAI/core/config"
)
// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key", "cookie:token"}, keyauth.ConfigDefault.AuthScheme)
if err != nil {
return nil, err
}
return &v2keyauth.Config{
CustomKeyLookup: customLookup,
Next: getApiKeyRequiredFilterFunction(applicationConfig),
Validator: getApiKeyValidationFunction(applicationConfig),
ErrorHandler: getApiKeyErrorHandler(applicationConfig),
AuthScheme: "Bearer",
}, nil
}
func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
return func(ctx *fiber.Ctx, err error) error {
if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
if len(applicationConfig.ApiKeys) == 0 {
return ctx.Next() // if no keys are set up, any error we get here is not an error.
}
ctx.Set("WWW-Authenticate", "Bearer")
if applicationConfig.OpaqueErrors {
return ctx.SendStatus(401)
}
return ctx.Status(401).Render("views/login", nil)
}
if applicationConfig.OpaqueErrors {
return ctx.SendStatus(500)
}
return err
}
}
func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
if applicationConfig.UseSubtleKeyComparison {
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
return true, nil
}
}
return false, v2keyauth.ErrMissingOrMalformedAPIKey
}
}
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if apiKey == validKey {
return true, nil
}
}
return false, v2keyauth.ErrMissingOrMalformedAPIKey
}
}
func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
if applicationConfig.DisableApiKeyRequirementForHttpGet {
return func(c *fiber.Ctx) bool {
if c.Method() != "GET" {
return false
}
for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
if rx.MatchString(c.Path()) {
return true
}
}
return false
}
}
return func(c *fiber.Ctx) bool { return false }
}
package middleware
import (
"crypto/subtle"
"errors"
"github.com/dave-gray101/v2keyauth"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/keyauth"
"github.com/microcosm-cc/bluemonday"
"github.com/mudler/LocalAI/core/config"
)
// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
if err != nil {
return nil, err
}
return &v2keyauth.Config{
CustomKeyLookup: customLookup,
Next: getApiKeyRequiredFilterFunction(applicationConfig),
Validator: getApiKeyValidationFunction(applicationConfig),
ErrorHandler: getApiKeyErrorHandler(applicationConfig),
AuthScheme: "Bearer",
}, nil
}
func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
return func(ctx *fiber.Ctx, err error) error {
if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
if len(applicationConfig.ApiKeys) == 0 {
return ctx.Next() // if no keys are set up, any error we get here is not an error.
}
if applicationConfig.OpaqueErrors {
return ctx.SendStatus(403)
}
return ctx.Status(403).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
}
if applicationConfig.OpaqueErrors {
return ctx.SendStatus(500)
}
return err
}
}
func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
if applicationConfig.UseSubtleKeyComparison {
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
return true, nil
}
}
return false, v2keyauth.ErrMissingOrMalformedAPIKey
}
}
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if apiKey == validKey {
return true, nil
}
}
return false, v2keyauth.ErrMissingOrMalformedAPIKey
}
}
func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
if applicationConfig.DisableApiKeyRequirementForHttpGet {
return func(c *fiber.Ctx) bool {
if c.Method() != "GET" {
return false
}
for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
if rx.MatchString(c.Path()) {
return true
}
}
return false
}
}
return func(c *fiber.Ctx) bool { return false }
}

View File

@@ -11,62 +11,61 @@ import (
"github.com/mudler/LocalAI/pkg/model"
)
func RegisterLocalAIRoutes(router *fiber.App,
func RegisterLocalAIRoutes(app *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
galleryService *services.GalleryService) {
router.Get("/swagger/*", swagger.HandlerDefault) // default
app.Get("/swagger/*", swagger.HandlerDefault) // default
// LocalAI API endpoints
if !appConfig.DisableGalleryEndpoint {
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
router.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
router.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
}
router.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
router.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))
app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
// Stores
sl := model.NewModelLoader("")
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
if !appConfig.DisableMetrics {
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
app.Get("/metrics", localai.LocalAIMetricsEndpoint())
}
// Experimental Backend Statistics Module
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
// p2p
if p2p.IsP2PEnabled() {
router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
}
router.Get("/version", func(c *fiber.Ctx) error {
app.Get("/version", func(c *fiber.Ctx) error {
return c.JSON(struct {
Version string `json:"version"`
}{Version: internal.PrintableVersion()})
})
router.Get("/system", localai.SystemInformations(ml, appConfig))
app.Get("/system", localai.SystemInformations(ml, appConfig))
// misc
router.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
}

View File

@@ -2,134 +2,84 @@ package routes
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/endpoints/openai"
"github.com/mudler/LocalAI/pkg/model"
)
func RegisterOpenAIRoutes(app *fiber.App,
application *application.Application) {
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig) {
// openAI compatible API endpoint
// chat
app.Post("/v1/chat/completions",
openai.ChatEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/chat/completions",
openai.ChatEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
// edit
app.Post("/v1/edits",
openai.EditEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/edits",
openai.EditEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))
// assistant
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants", openai.ListAssistantsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants", openai.CreateAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
// files
app.Post("/v1/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Post("/files", openai.UploadFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files", openai.ListFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files/:file_id", openai.GetFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(application.BackendLoader(), application.ApplicationConfig()))
app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
// completion
app.Post("/v1/completions",
openai.CompletionEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/completions",
openai.CompletionEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/v1/engines/:model/completions",
openai.CompletionEndpoint(
application.BackendLoader(),
application.ModelLoader(),
application.TemplatesEvaluator(),
application.ApplicationConfig(),
),
)
app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))
// embeddings
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
// audio
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/audio/speech", localai.TTSEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))
// images
app.Post("/v1/images/generations", openai.ImageEndpoint(application.BackendLoader(), application.ModelLoader(), application.ApplicationConfig()))
app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))
if application.ApplicationConfig().ImageDir != "" {
app.Static("/generated-images", application.ApplicationConfig().ImageDir)
if appConfig.ImageDir != "" {
app.Static("/generated-images", appConfig.ImageDir)
}
if application.ApplicationConfig().AudioDir != "" {
app.Static("/generated-audio", application.ApplicationConfig().AudioDir)
if appConfig.AudioDir != "" {
app.Static("/generated-audio", appConfig.AudioDir)
}
// List models
app.Get("/v1/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
app.Get("/models", openai.ListModelsEndpoint(application.BackendLoader(), application.ModelLoader()))
app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
app.Get("/models", openai.ListModelsEndpoint(cl, ml))
}

View File

File diff suppressed because one or more lines are too long

View File

@@ -1,23 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Open Authenticated Website</title>
</head>
<body>
<h1>Authorization is required</h1>
<input type="text" id="token" placeholder="Token" />
<button onclick="login()">Login</button>
<script>
function login() {
const token = document.getElementById('token').value;
var date = new Date();
date.setTime(date.getTime() + (24*60*60*1000));
document.cookie = `token=${token}; expires=${date.toGMTString()}`;
window.location.reload();
}
</script>
</body>
</html>

Some files were not shown because too many files have changed in this diff Show More