Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
a1d5462ad0 Stores to chromem (WIP)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-01-21 10:35:01 +01:00
93 changed files with 1484 additions and 1376 deletions

View File

@@ -7,7 +7,7 @@ services:
args:
- FFMPEG=true
- IMAGE_TYPE=extras
- GO_TAGS=p2p tts
- GO_TAGS=stablediffusion p2p tts
env_file:
- ../.env
ports:

6
.env
View File

@@ -38,12 +38,12 @@
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
## Enable go tags, available: p2p, tts
## p2p: enable distributed inferencing
## Enable go tags, available: stablediffusion, tts
## stablediffusion: image generation with stablediffusion
## tts: enables text-to-speech with go-piper
## (requires REBUILD=true)
#
# GO_TAGS=p2p
# GO_TAGS=stablediffusion
## Path where to store generated images
# LOCALAI_IMAGE_PATH=/tmp/generated/images

View File

@@ -237,7 +237,40 @@ jobs:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
build-stablediffusion:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build stablediffusion
run: |
export PATH=$PATH:$GOPATH/bin
make backend-assets/grpc/stablediffusion
mkdir -p release && cp backend-assets/grpc/stablediffusion release
env:
GO_TAGS: stablediffusion
- uses: actions/upload-artifact@v4
with:
name: stablediffusion
path: release/
- name: Release
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
build-macOS-x86_64:
runs-on: macos-13

View File

@@ -78,6 +78,57 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/diffusers
make --jobs=5 --output-sync=target -C backend/python/diffusers test
tests-parler-tts:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test parler-tts
run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19
with:
detached: true
connect-timeout-seconds: 180
limit-access-to-actor: true
tests-openvoice:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
# Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test openvoice
run: |
make --jobs=5 --output-sync=target -C backend/python/openvoice
make --jobs=5 --output-sync=target -C backend/python/openvoice test
# tests-transformers-musicgen:
# runs-on: ubuntu-latest
# steps:

View File

@@ -105,7 +105,9 @@ jobs:
# Pre-build piper before we start tests in order to have shared libraries in place
make sources/go-piper && \
GO_TAGS="tts" make -C sources/go-piper piper.o && \
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
env:
CUDA_VERSION: 12-4
- name: Cache grpc
@@ -127,7 +129,7 @@ jobs:
cd grpc && cd cmake/build && sudo make --jobs 5 install
- name: Test
run: |
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.19

2
.vscode/launch.json vendored
View File

@@ -26,7 +26,7 @@
"LOCALAI_P2P": "true",
"LOCALAI_FEDERATED": "true"
},
"buildFlags": ["-tags", "p2p tts", "-v"],
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
"envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}"
}

View File

@@ -15,7 +15,8 @@ ARG TARGETARCH
ARG TARGETVARIANT
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
@@ -68,10 +69,14 @@ ENV PATH=/opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
apt-get install -y --no-install-recommends \
libopenblas-dev && \
libopenblas-dev \
libopencv-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Set up OpenCV
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
WORKDIR /build
###################################
@@ -246,7 +251,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
FROM requirements-drivers AS builder-base
ARG GO_TAGS="tts p2p"
ARG GO_TAGS="stablediffusion tts p2p"
ARG GRPC_BACKENDS
ARG MAKEFLAGS
ARG LD_FLAGS="-s -w"
@@ -280,12 +285,35 @@ RUN <<EOT bash
fi
EOT
###################################
###################################
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
FROM builder-base AS builder-sd
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
COPY Makefile .
COPY go.mod .
COPY go.sum .
COPY backend/backend.proto ./backend/backend.proto
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
COPY pkg/grpc ./pkg/grpc
COPY pkg/stablediffusion ./pkg/stablediffusion
RUN git init
RUN make sources/go-stable-diffusion
RUN touch prepare-sources
# Actually build the backend
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
###################################
###################################
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM builder-base AS builder
FROM builder-sd AS builder
# Install the pre-built GRPC
COPY --from=grpc /opt/grpc /usr/local
@@ -303,7 +331,7 @@ RUN make prepare
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
## (both will use CUDA or hipblas for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \
make build; \
fi
@@ -325,6 +353,8 @@ ARG FFMPEG
COPY --from=grpc /opt/grpc /usr/local
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
COPY .devcontainer-scripts /.devcontainer-scripts
# Add FFmpeg
@@ -397,6 +427,9 @@ COPY --from=builder /build/local-ai ./
# Copy shared libraries for piper
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
# Change the shell to bash so we can use [[ tests below
SHELL ["/bin/bash", "-c"]
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
@@ -410,8 +443,8 @@ RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/coqui \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/faster-whisper \
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/parler-tts \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/diffusers \
@@ -420,6 +453,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/kokoro \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/openvoice \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/exllama2 \
; fi && \
@@ -438,6 +474,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/rerankers \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/mamba \
; fi
# Make sure the models directory exists

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -18,6 +18,10 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
PIPER_REPO?=https://github.com/mudler/go-piper
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
# stablediffusion version
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
# bark.cpp
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
BARKCPP_VERSION?=v1.0.0
@@ -175,6 +179,11 @@ ifeq ($(STATIC),true)
LD_FLAGS+=-linkmode external -extldflags -static
endif
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
endif
ifeq ($(findstring tts,$(GO_TAGS)),tts)
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
@@ -186,7 +195,6 @@ endif
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
@@ -265,6 +273,19 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## stable diffusion (onnx)
sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion
cd sources/go-stable-diffusion && \
git init && \
git remote add origin $(STABLEDIFFUSION_REPO) && \
git fetch origin && \
git checkout $(STABLEDIFFUSION_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
## stablediffusion (ggml)
sources/stablediffusion-ggml.cpp:
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
@@ -310,18 +331,20 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp
replace:
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
@@ -332,6 +355,7 @@ rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-piper clean
$(MAKE) build
@@ -446,7 +470,7 @@ prepare-test: grpcs
test: prepare test-models/testmodel.ggml grpcs
@echo 'Running tests'
export GO_TAGS="tts debug"
export GO_TAGS="tts stablediffusion debug"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
@@ -534,10 +558,10 @@ protogen-go-clean:
$(RM) bin/*
.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
@@ -571,14 +595,6 @@ diffusers-protogen:
diffusers-protogen-clean:
$(MAKE) -C backend/python/diffusers protogen-clean
.PHONY: faster-whisper-protogen
faster-whisper-protogen:
$(MAKE) -C backend/python/faster-whisper protogen
.PHONY: faster-whisper-protogen-clean
faster-whisper-protogen-clean:
$(MAKE) -C backend/python/faster-whisper protogen-clean
.PHONY: exllama2-protogen
exllama2-protogen:
$(MAKE) -C backend/python/exllama2 protogen
@@ -587,6 +603,14 @@ exllama2-protogen:
exllama2-protogen-clean:
$(MAKE) -C backend/python/exllama2 protogen-clean
.PHONY: mamba-protogen
mamba-protogen:
$(MAKE) -C backend/python/mamba protogen
.PHONY: mamba-protogen-clean
mamba-protogen-clean:
$(MAKE) -C backend/python/mamba protogen-clean
.PHONY: rerankers-protogen
rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen
@@ -603,6 +627,14 @@ transformers-protogen:
transformers-protogen-clean:
$(MAKE) -C backend/python/transformers protogen-clean
.PHONY: parler-tts-protogen
parler-tts-protogen:
$(MAKE) -C backend/python/parler-tts protogen
.PHONY: parler-tts-protogen-clean
parler-tts-protogen-clean:
$(MAKE) -C backend/python/parler-tts protogen-clean
.PHONY: kokoro-protogen
kokoro-protogen:
$(MAKE) -C backend/python/kokoro protogen
@@ -611,6 +643,14 @@ kokoro-protogen:
kokoro-protogen-clean:
$(MAKE) -C backend/python/kokoro protogen-clean
.PHONY: openvoice-protogen
openvoice-protogen:
$(MAKE) -C backend/python/openvoice protogen
.PHONY: openvoice-protogen-clean
openvoice-protogen-clean:
$(MAKE) -C backend/python/openvoice protogen-clean
.PHONY: vllm-protogen
vllm-protogen:
$(MAKE) -C backend/python/vllm protogen
@@ -626,11 +666,13 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/faster-whisper
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/mamba
$(MAKE) -C backend/python/rerankers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/parler-tts
$(MAKE) -C backend/python/kokoro
$(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
@@ -700,13 +742,6 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx512
$(MAKE) -C backend/cpp/llama-avx512 purge
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge
@@ -781,6 +816,13 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper
endif
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/stablediffusion
endif
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero

View File

@@ -39,7 +39,7 @@
</p>
<p align="center">
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)

View File

@@ -1,17 +1,56 @@
name: stablediffusion
backend: stablediffusion-ggml
cfg_scale: 4.5
options:
- sampler:euler
backend: stablediffusion
parameters:
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
step: 25
model: stablediffusion_assets
license: "BSD-3"
urls:
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
description: |
Stable Diffusion in NCNN with c++, supported txt2img and img2img
download_files:
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- filename: "stablediffusion_assets/log_sigmas.bin"
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
usage: |
curl http://localhost:8080/v1/images/generations \

View File

@@ -21,8 +21,7 @@ service Backend {
rpc Status(HealthMessage) returns (StatusResponse) {}
rpc StoresSet(StoresSetOptions) returns (Result) {}
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
rpc StoresReset(StoresResetOptions) returns (Result) {}
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
rpc Rerank(RerankRequest) returns (RerankResult) {}
@@ -78,19 +77,10 @@ message StoresSetOptions {
repeated StoresValue Values = 2;
}
message StoresDeleteOptions {
message StoresResetOptions {
repeated StoresKey Keys = 1;
}
message StoresGetOptions {
repeated StoresKey Keys = 1;
}
message StoresGetResult {
repeated StoresKey Keys = 1;
repeated StoresValue Values = 2;
}
message StoresFindOptions {
StoresKey Key = 1;
int32 TopK = 2;

View File

@@ -22,7 +22,6 @@
#include "backend.grpc.pb.h"
#include "utils.hpp"
#include "sampling.h"
#include "speculative.h"
// include std::regex
#include <cstddef>
#include <thread>
@@ -186,45 +185,12 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
return out;
}
struct llama_slot_params {
uint32_t seed = -1; // RNG seed
bool stream = true;
bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt
bool return_tokens = false;
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_discard = 0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
int32_t n_predict = -1; // new tokens to predict
int32_t n_indent = 0; // mininum line indentation for the generated text in number of whitespace characters
int64_t t_max_prompt_ms = -1; // TODO: implement
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
std::vector<common_adapter_lora_info> lora;
std::vector<std::string> antiprompt;
std::vector<std::string> response_fields;
bool timings_per_token = false;
bool post_sampling_probs = false;
bool ignore_eos = false;
json input_prefix;
json input_suffix;
struct common_params_sampling sampling;
struct common_params_speculative speculative;
};
struct llama_client_slot
{
int id;
int task_id = -1;
struct llama_slot_params params;
common_speculative * spec = nullptr;
llama_batch batch_spec = {};
struct slot_params params;
slot_state state = IDLE;
slot_command command = NONE;
@@ -317,7 +283,6 @@ struct llama_client_slot
images.clear();
}
bool has_budget(common_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1)
{
@@ -489,10 +454,6 @@ struct llama_server_context
{
llama_model *model = nullptr;
llama_context *ctx = nullptr;
common_init_result llama_init_dft;
llama_context * ctx_dft = nullptr;
llama_model * model_dft = nullptr;
llama_context_params cparams_dft;
const llama_vocab * vocab = nullptr;
clip_ctx *clp_ctx = nullptr;
@@ -541,7 +502,6 @@ struct llama_server_context
}
}
bool load_model(const common_params &params_)
{
params = params_;
@@ -585,45 +545,6 @@ struct llama_server_context
add_bos_token = llama_vocab_get_add_bos(vocab);
has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
if (!params.speculative.model.empty()) {
LOG("loading draft model '%s'\n", params.speculative.model.c_str());
auto params_dft = params;
params_dft.devices = params.speculative.devices;
params_dft.model = params.speculative.model;
params_dft.n_ctx = params.speculative.n_ctx == 0 ? params.n_ctx / params.n_parallel : params.speculative.n_ctx;
params_dft.n_gpu_layers = params.speculative.n_gpu_layers;
params_dft.n_parallel = 1;
llama_init_dft = common_init_from_params(params_dft);
model_dft = llama_init_dft.model.get();
if (model_dft == nullptr) {
LOG("failed to load draft model, '%s'\n", params.speculative.model.c_str());
return false;
}
if (!common_speculative_are_compatible(ctx, llama_init_dft.context.get())) {
LOG("the draft model '%s' is not compatible with the target model '%s'\n", params.speculative.model.c_str(), params.model.c_str());
return false;
}
const int n_ctx_dft = llama_n_ctx(llama_init_dft.context.get());
cparams_dft = common_context_params_to_llama(params_dft);
cparams_dft.n_batch = n_ctx_dft;
// force F16 KV cache for the draft model for extra performance
cparams_dft.type_k = GGML_TYPE_F16;
cparams_dft.type_v = GGML_TYPE_F16;
// the context is not needed - we will create one for each slot
llama_init_dft.context.reset();
}
return true;
}
@@ -652,22 +573,6 @@ struct llama_server_context
slot.n_ctx = n_ctx_slot;
slot.n_predict = params.n_predict;
if (model_dft) {
slot.batch_spec = llama_batch_init(params.speculative.n_max + 1, 0, 1);
ctx_dft = llama_init_from_model(model_dft, cparams_dft);
if (ctx_dft == nullptr) {
LOG("%s", "failed to create draft context\n");
return;
}
slot.spec = common_speculative_init(ctx_dft);
if (slot.spec == nullptr) {
LOG("%s", "failed to create speculator\n");
return;
}
}
LOG_INFO("new slot", {
{"slot_id", slot.id},
{"n_ctx_slot", slot.n_ctx}
@@ -776,11 +681,9 @@ struct llama_server_context
}
bool launch_slot_with_data(llama_client_slot* &slot, json data) {
llama_slot_params default_params;
slot_params default_params;
common_params_sampling default_sparams;
default_sparams.speculative = params_base.speculative;
slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
@@ -804,15 +707,6 @@ struct llama_server_context
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
slot->sparams.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
slot->sparams.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
slot->sparams.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
slot->sparams.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min);
slot->sparams.speculative.n_min = std::max(params.speculative.n_min, 2);
slot->sparams.speculative.n_max = std::max(params.speculative.n_max, 0);
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
// Might be better to reject the request with a 400 ?
LOG_WARNING("Max tokens to predict exceeds server configuration", {
@@ -2130,97 +2024,6 @@ struct llama_server_context
}
}
// do speculative decoding
for (auto & slot : slots) {
if (!slot.is_processing() || !(ctx_dft && params.speculative.n_max > 0)) {
continue;
}
if (slot.state != PROCESSING) {
continue;
}
// determine the max draft that fits the current slot state
int n_draft_max = slot.params.speculative.n_max;
// note: n_past is not yet increased for the `id` token sampled above
// also, need to leave space for 1 extra token to allow context shifts
n_draft_max = std::min(n_draft_max, slot.n_ctx - slot.n_past - 2);
if (slot.n_remaining > 0) {
n_draft_max = std::min(n_draft_max, slot.n_remaining - 1);
}
LOG("max possible draft: %d\n", n_draft_max);
if (n_draft_max < slot.params.speculative.n_min) {
LOG("the max possible draft is too small: %d < %d - skipping speculative decoding\n", n_draft_max, slot.params.speculative.n_min);
continue;
}
llama_token id = slot.sampled;
struct common_speculative_params params_spec;
params_spec.n_draft = n_draft_max;
params_spec.n_reuse = llama_n_ctx(ctx_dft) - slot.params.speculative.n_max;
params_spec.p_min = slot.params.speculative.p_min;
llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id);
// ignore small drafts
if (slot.params.speculative.n_min > (int) draft.size()) {
LOG("ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
continue;
}
// construct the speculation batch
common_batch_clear(slot.batch_spec);
common_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
for (size_t i = 0; i < draft.size(); ++i) {
common_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
}
LOG("decoding speculative batch, size = %d\n", slot.batch_spec.n_tokens);
llama_decode(ctx, slot.batch_spec);
// the accepted tokens from the speculation
const auto ids = common_sampler_sample_and_accept_n(slot.ctx_sampling, ctx, draft);
slot.n_past += ids.size();
slot.n_decoded += ids.size();
slot.cache_tokens.push_back(id);
slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
for (size_t i = 0; i < ids.size(); ++i) {
completion_token_output result;
result.tok = ids[i];
result.text_to_send = common_token_to_piece(ctx, result.tok, params.special);
//result.prob = 1.0f; // set later
// TODO: set result.probs
if (!process_token(result, slot)) {
// release slot because of stop condition
slot.release();
slot.print_timings();
send_final_response(slot);
metrics.on_prediction(slot);
break;
}
}
LOG("accepted %d/%d draft tokens, new n_past = %d\n", (int) ids.size() - 1, (int) draft.size(), slot.n_past);
}
LOG_VERBOSE("slots updated", {});
return true;
}
@@ -2493,30 +2296,6 @@ static void params_parse(const backend::ModelOptions* request,
params.cpuparams.n_threads = request->threads();
params.n_gpu_layers = request->ngpulayers();
params.n_batch = request->nbatch();
params.speculative.model = request->draftmodel();
// If options is not NULL, parse options
for (int i = 0; request->options()[i] != NULL; i++) {
char *optname = strtok(request->options()[i], ":");
char *optval = strtok(NULL, ":");
if (optval == NULL) {
optval = "true";
}
if (!strcmp(optname, "speculative.n_gpu_layers")) {
params.speculative.n_gpu_layers = std::stoi(optval);
}
if (!strcmp(optname, "speculative.n_ctx")) {
params.speculative.n_ctx = std::stoi(optval);
}
}
if params.speculative.n_gpu_layers == 0 {
params.speculative.n_gpu_layers = params.n_gpu_layers;
}
if params.speculative.n_ctx == 0 {
params.speculative.n_ctx = params.n_ctx;
}
// Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
//params.n_parallel = 1;
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");

View File

@@ -0,0 +1,21 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &Image{}); err != nil {
panic(err)
}
}

View File

@@ -0,0 +1,33 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/stablediffusion"
)
type Image struct {
base.SingleThread
stablediffusion *stablediffusion.StableDiffusion
}
func (image *Image) Load(opts *pb.ModelOptions) error {
var err error
// Note: the Model here is a path to a directory containing the model files
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
return err
}
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
return image.stablediffusion.GenerateImage(
int(opts.Height),
int(opts.Width),
int(opts.Mode),
int(opts.Step),
int(opts.Seed),
opts.PositivePrompt,
opts.NegativePrompt,
opts.Dst)
}

View File

@@ -4,101 +4,36 @@ package main
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"container/heap"
"context"
"fmt"
"math"
"slices"
"runtime"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
chromem "github.com/philippgille/chromem-go"
"github.com/rs/zerolog/log"
)
type Store struct {
base.SingleThread
// The sorted keys
keys [][]float32
// The sorted values
values [][]byte
// If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions
// TODO: Should we normalize incoming keys if they are not instead?
keysAreNormalized bool
// The first key decides the length of the keys
keyLen int
}
// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because
// that's theoretically best for memory layout and cache locality, but this isn't optimized yet.
type Pair struct {
Key []float32
Value []byte
*chromem.DB
*chromem.Collection
}
func NewStore() *Store {
return &Store{
keys: make([][]float32, 0),
values: make([][]byte, 0),
keysAreNormalized: true,
keyLen: -1,
}
}
func compareSlices(k1, k2 []float32) int {
assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
return slices.Compare(k1, k2)
}
func hasKey(unsortedSlice [][]float32, target []float32) bool {
return slices.ContainsFunc(unsortedSlice, func(k []float32) bool {
return compareSlices(k, target) == 0
})
}
func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) {
return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int {
return compareSlices(k, t)
})
}
func isSortedPairs(kvs []Pair) bool {
for i := 1; i < len(kvs); i++ {
if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 {
return false
}
}
return true
}
func isSortedKeys(keys [][]float32) bool {
for i := 1; i < len(keys); i++ {
if compareSlices(keys[i-1], keys[i]) > 0 {
return false
}
}
return true
}
func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
ks := make([][]float32, len(keys))
for i, k := range keys {
ks[i] = k.Floats
}
slices.SortFunc(ks, compareSlices)
assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys)))
assert(isSortedKeys(ks), "keys are not sorted")
return ks
return &Store{}
}
func (s *Store) Load(opts *pb.ModelOptions) error {
db := chromem.NewDB()
collection, err := db.CreateCollection("all-documents", nil, nil)
if err != nil {
return err
}
s.DB = db
s.Collection = collection
return nil
}
@@ -111,156 +46,25 @@ func (s *Store) StoresSet(opts *pb.StoresSetOptions) error {
if len(opts.Keys) != len(opts.Values) {
return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
}
if s.keyLen == -1 {
s.keyLen = len(opts.Keys[0].Floats)
} else {
if len(opts.Keys[0].Floats) != s.keyLen {
return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
}
}
kvs := make([]Pair, len(opts.Keys))
docs := []chromem.Document{}
for i, k := range opts.Keys {
if s.keysAreNormalized && !isNormalized(k.Floats) {
s.keysAreNormalized = false
var sample []float32
if len(s.keys) > 5 {
sample = k.Floats[:5]
} else {
sample = k.Floats
}
log.Debug().Msgf("Key is not normalized: %v", sample)
}
kvs[i] = Pair{
Key: k.Floats,
Value: opts.Values[i].Bytes,
}
docs = append(docs, chromem.Document{
ID: k.String(),
Content: opts.Values[i].String(),
})
}
slices.SortFunc(kvs, func(a, b Pair) int {
return compareSlices(a.Key, b.Key)
})
assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys)))
assert(isSortedPairs(kvs), "keys are not sorted")
l := len(kvs) + len(s.keys)
merge_ks := make([][]float32, 0, l)
merge_vs := make([][]byte, 0, l)
i, j := 0, 0
for {
if i+j >= l {
break
}
if i >= len(kvs) {
merge_ks = append(merge_ks, s.keys[j])
merge_vs = append(merge_vs, s.values[j])
j++
continue
}
if j >= len(s.keys) {
merge_ks = append(merge_ks, kvs[i].Key)
merge_vs = append(merge_vs, kvs[i].Value)
i++
continue
}
c := compareSlices(kvs[i].Key, s.keys[j])
if c < 0 {
merge_ks = append(merge_ks, kvs[i].Key)
merge_vs = append(merge_vs, kvs[i].Value)
i++
} else if c > 0 {
merge_ks = append(merge_ks, s.keys[j])
merge_vs = append(merge_vs, s.values[j])
j++
} else {
merge_ks = append(merge_ks, kvs[i].Key)
merge_vs = append(merge_vs, kvs[i].Value)
i++
j++
}
}
assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l))
assert(isSortedKeys(merge_ks), "merge keys are not sorted")
s.keys = merge_ks
s.values = merge_vs
return nil
return s.Collection.AddDocuments(context.Background(), docs, runtime.NumCPU())
}
func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error {
if len(opts.Keys) == 0 {
return fmt.Errorf("no keys to delete")
func (s *Store) StoresReset(opts *pb.StoresResetOptions) error {
err := s.DB.DeleteCollection("all-documents")
if err != nil {
return err
}
if len(opts.Keys) == 0 {
return fmt.Errorf("no keys to add")
}
if s.keyLen == -1 {
s.keyLen = len(opts.Keys[0].Floats)
} else {
if len(opts.Keys[0].Floats) != s.keyLen {
return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
}
}
ks := sortIntoKeySlicese(opts.Keys)
l := len(s.keys) - len(ks)
merge_ks := make([][]float32, 0, l)
merge_vs := make([][]byte, 0, l)
tail_ks := s.keys
tail_vs := s.values
for _, k := range ks {
j, found := findInSortedSlice(tail_ks, k)
if found {
merge_ks = append(merge_ks, tail_ks[:j]...)
merge_vs = append(merge_vs, tail_vs[:j]...)
tail_ks = tail_ks[j+1:]
tail_vs = tail_vs[j+1:]
} else {
assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k))
}
log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs))
}
merge_ks = append(merge_ks, tail_ks...)
merge_vs = append(merge_vs, tail_vs...)
assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys)))
s.keys = merge_ks
s.values = merge_vs
assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l))
assert(isSortedKeys(s.keys), "keys are not sorted")
assert(func() bool {
for _, k := range ks {
if _, found := findInSortedSlice(s.keys, k); found {
return false
}
}
return true
}(), "Keys to delete still present")
if len(s.keys) != l {
log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l)
}
return nil
s.Collection, err = s.CreateCollection("all-documents", nil, nil)
return err
}
func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
@@ -311,16 +115,12 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
}
func isNormalized(k []float32) bool {
var sum float64
var sum float32
for _, v := range k {
v64 := float64(v)
sum += v64*v64
sum += v
}
s := math.Sqrt(sum)
return s >= 0.99 && s <= 1.01
return sum == 1.0
}
// TODO: This we could replace with handwritten SIMD code
@@ -332,7 +132,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
dot += k1[i] * k2[i]
}
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
// 2.0 * (1.0 - dot) would be the Euclidean distance
return dot
@@ -422,7 +222,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
return sim
}

View File

@@ -1,94 +0,0 @@
#!/usr/bin/env python3
"""
This is an extra gRPC server of LocalAI for Bark TTS
"""
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc
from faster_whisper import WhisperModel
import grpc
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
BackendServicer is the class that implements the gRPC service
"""
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
device = "cpu"
# Get device
# device = "cuda" if request.CUDA else "cpu"
if request.CUDA:
device = "cuda"
try:
print("Preparing models, please wait", file=sys.stderr)
self.model = WhisperModel(request.Model, device=device, compute_type="float16")
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service
# Replace this with your desired response
return backend_pb2.Result(message="Model loaded successfully", success=True)
def AudioTranscription(self, request, context):
resultSegments = []
text = ""
try:
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
id = 0
for segment in segments:
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
text += segment.text
id += 1
except Exception as err:
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@@ -1,8 +0,0 @@
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
torch==2.4.1
optimum-quanto

View File

@@ -1,9 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -1,8 +0,0 @@
torch==2.4.1
faster-whisper
opencv-python
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
faster-whisper

View File

@@ -0,0 +1,29 @@
.PHONY: mamba
mamba: protogen
bash install.sh
.PHONY: run
run: protogen
@echo "Running mamba..."
bash run.sh
@echo "mamba run."
.PHONY: test
test: protogen
@echo "Testing mamba..."
bash test.sh
@echo "mamba tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
$(RM) -r venv __pycache__

View File

@@ -0,0 +1,5 @@
# Creating a separate environment for the mamba project
```
make mamba
```

View File

@@ -0,0 +1,179 @@
#!/usr/bin/env python3
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc
import grpc
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer that implements the Backend service defined in backend.proto.
"""
def generate(self,prompt, max_new_tokens):
"""
Generates text based on the given prompt and maximum number of new tokens.
Args:
prompt (str): The prompt to generate text from.
max_new_tokens (int): The maximum number of new tokens to generate.
Returns:
str: The generated text.
"""
self.generator.end_beam_search()
# Tokenizing the input
ids = self.generator.tokenizer.encode(prompt)
self.generator.gen_begin_reuse(ids)
initial_len = self.generator.sequence[0].shape[0]
has_leading_space = False
decoded_text = ''
for i in range(max_new_tokens):
token = self.generator.gen_single_token()
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith(''):
has_leading_space = True
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
if has_leading_space:
decoded_text = ' ' + decoded_text
if token.item() == self.generator.tokenizer.eos_token_id:
break
return decoded_text
def Health(self, request, context):
"""
Returns a health check message.
Args:
request: The health check request.
context: The gRPC context.
Returns:
backend_pb2.Reply: The health check reply.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
"""
Loads a language model.
Args:
request: The load model request.
context: The gRPC context.
Returns:
backend_pb2.Result: The load model result.
"""
try:
tokenizerModel = request.Tokenizer
if tokenizerModel == "":
tokenizerModel = request.Model
tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
if MAMBA_CHAT:
tokenizer.eos_token = "<|endoftext|>"
tokenizer.pad_token = tokenizer.eos_token
self.tokenizer = tokenizer
self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Predict(self, request, context):
"""
Generates text based on the given prompt and sampling parameters.
Args:
request: The predict request.
context: The gRPC context.
Returns:
backend_pb2.Result: The predict result.
"""
if request.TopP == 0:
request.TopP = 0.9
max_tokens = request.Tokens
if request.Tokens == 0:
max_tokens = 2000
# encoded_input = self.tokenizer(request.Prompt)
tokens = self.tokenizer(request.Prompt, return_tensors="pt")
input_ids = tokens.input_ids.to(device="cuda")
out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
decoded = self.tokenizer.batch_decode(out)
generated_text = decoded[0]
# Remove prompt from response if present
if request.Prompt in generated_text:
generated_text = generated_text.replace(request.Prompt, "")
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
def PredictStream(self, request, context):
"""
Generates text based on the given prompt and sampling parameters, and streams the results.
Args:
request: The predict stream request.
context: The gRPC context.
Returns:
backend_pb2.Result: The predict stream result.
"""
yield self.Predict(request, context)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -e
LIMIT_TARGETS="cublas"
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
source $(dirname $0)/../common/libbackend.sh
installRequirements

View File

@@ -0,0 +1,2 @@
causal-conv1d==1.4.0
mamba-ssm==2.2.2

View File

@@ -0,0 +1,2 @@
torch==2.4.1
transformers

View File

@@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118
transformers

View File

@@ -0,0 +1,2 @@
torch==2.4.1
transformers

View File

@@ -0,0 +1,6 @@
# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
# this also means that we need to install the basic build dependencies into the venv ourselves
# https://github.com/Dao-AILab/causal-conv1d/issues/24
packaging
setuptools
wheel

View File

@@ -1,3 +1,3 @@
grpcio==1.69.0
protobuf
grpcio-tools
certifi

6
backend/python/mamba/run.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/bin/bash
LIMIT_TARGETS="cublas"
source $(dirname $0)/../common/libbackend.sh
startBackend $@

View File

@@ -0,0 +1,76 @@
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc
import grpc
import unittest
import subprocess
import time
import grpc
import backend_pb2_grpc
import backend_pb2
class TestBackendServicer(unittest.TestCase):
"""
TestBackendServicer is the class that tests the gRPC service.
This class contains methods to test the startup and shutdown of the gRPC service.
"""
def setUp(self):
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
def tearDown(self) -> None:
self.service.terminate()
self.service.wait()
def test_server_startup(self):
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()
def test_load_model(self):
"""
This method tests if the model is loaded successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
self.assertTrue(response.success)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()
def test_text(self):
"""
This method tests if the embeddings are generated successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
self.assertTrue(response.success)
req = backend_pb2.PredictOptions(Prompt="The capital of France is")
resp = stub.Predict(req)
self.assertIsNotNone(resp.message)
except Exception as err:
print(err)
self.fail("text service failed")
finally:
self.tearDown()

View File

@@ -1,9 +1,8 @@
.DEFAULT_GOAL := install
.PHONY: install
install:
install: protogen
bash install.sh
$(MAKE) protogen
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
@@ -13,8 +12,14 @@ protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__
rm -rf venv __pycache__
.PHONY: test
test: protogen
@echo "Testing openvoice..."
bash test.sh
@echo "openvoice tested."

View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python3
"""
Extra gRPC server for OpenVoice models.
"""
from concurrent import futures
import argparse
import signal
import sys
import os
import torch
from openvoice import se_extractor
from openvoice.api import ToneColorConverter
from melo.api import TTS
import time
import backend_pb2
import backend_pb2_grpc
import grpc
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer for the backend service.
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
"""
def Health(self, request, context):
"""
A gRPC method that returns the health status of the backend service.
Args:
request: A HealthRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Reply object that contains the health status of the backend service.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
"""
A gRPC method that loads a model into memory.
Args:
request: A LoadModelRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Result object that contains the result of the LoadModel operation.
"""
model_name = request.Model
try:
self.clonedVoice = False
# Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
# get base path of modelFile
modelFileBase = os.path.dirname(request.ModelFile)
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
if request.AudioPath != "":
self.clonedVoice = True
self.modelpath = request.ModelFile
self.speaker = request.Type
self.ClonedVoicePath = request.AudioPath
ckpt_converter = request.Model+'/converter'
device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.device = device
self.tone_color_converter = None
if self.clonedVoice:
self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def TTS(self, request, context):
model_name = request.model
if model_name == "":
return backend_pb2.Result(success=False, message="request.model is required")
try:
# Speed is adjustable
speed = 1.0
voice = "EN"
if request.voice:
voice = request.voice
model = TTS(language=voice, device=self.device)
speaker_ids = model.hps.data.spk2id
speaker_key = self.speaker
modelpath = self.modelpath
for s in speaker_ids.keys():
print(f"Speaker: {s} - ID: {speaker_ids[s]}")
speaker_id = speaker_ids[speaker_key]
speaker_key = speaker_key.lower().replace('_', '-')
source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
if self.clonedVoice:
reference_speaker = self.ClonedVoicePath
target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
# Run the tone color converter
encode_message = "@MyShell"
self.tone_color_converter.convert(
audio_src_path=request.dst,
src_se=source_se,
tgt_se=target_se,
output_path=request.dst,
message=encode_message)
print("[OpenVoice] TTS generated!", file=sys.stderr)
print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
print(request, file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("[OpenVoice] Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
print(f"[OpenVoice] startup: {args}", file=sys.stderr)
serve(args.addr)

View File

@@ -12,3 +12,5 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
fi
installRequirements
python -m unidic download

View File

@@ -0,0 +1,7 @@
torch==2.4.1
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -0,0 +1,8 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -0,0 +1,7 @@
torch==2.4.1
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -0,0 +1,8 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.4.1+rocm6.0
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git
whisper-timestamped
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2

View File

@@ -0,0 +1,24 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
torchaudio==2.3.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu
optimum[openvino]
grpcio==1.69.0
protobuf
librosa==0.9.1
faster-whisper==0.9.0
pydub==0.25.1
wavmark==0.0.3
eng_to_ipa==0.0.2
inflect==7.0.0
unidecode==1.3.7
whisper-timestamped==1.14.2
openai
python-dotenv
pypinyin==0.50.0
cn2an==0.5.22
jieba==0.42.1
langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git

View File

@@ -0,0 +1,17 @@
grpcio==1.69.0
protobuf
librosa
faster-whisper
inflect
unidecode
openai
python-dotenv
pypinyin
cn2an==0.5.22
numpy==1.22.0
networkx==2.8.8
jieba==0.42.1
gradio==5.9.1
langid==1.1.6
llvmlite==0.43.0
setuptools

View File

@@ -0,0 +1,82 @@
"""
A test script to test the gRPC service
"""
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc
import grpc
class TestBackendServicer(unittest.TestCase):
"""
TestBackendServicer is the class that tests the gRPC service
"""
def setUp(self):
"""
This method sets up the gRPC service by starting the server
"""
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(30)
def tearDown(self) -> None:
"""
This method tears down the gRPC service by terminating the server
"""
self.service.terminate()
self.service.wait()
def test_server_startup(self):
"""
This method tests if the server starts up successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()
def test_load_model(self):
"""
This method tests if the model is loaded successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2",
Type="en-us"))
self.assertTrue(response.success)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()
def test_tts(self):
"""
This method tests if the embeddings are generated successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
self.assertTrue(response.success)
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
tts_response = stub.TTS(tts_request)
self.assertIsNotNone(tts_response)
except Exception as err:
print(err)
self.fail("TTS service failed")
finally:
self.tearDown()

View File

@@ -0,0 +1,12 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
# Download checkpoints if not present
if [ ! -d "checkpoints_v2" ]; then
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
unzip checkpoints_v2.zip
fi
runUnittests

View File

@@ -0,0 +1,44 @@
export CONDA_ENV_PATH = "parler.yml"
SKIP_CONDA?=0
ifeq ($(BUILD_TYPE), cublas)
export CONDA_ENV_PATH = "parler-nvidia.yml"
endif
# Intel GPU are supposed to have dependencies installed in the main python
# environment, so we skip conda installation for SYCL builds.
# https://github.com/intel/intel-extension-for-pytorch/issues/538
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
export SKIP_CONDA=1
endif
.PHONY: parler-tts
parler-tts:
@echo "Installing $(CONDA_ENV_PATH)..."
bash install.sh $(CONDA_ENV_PATH)
$(MAKE) protogen
.PHONY: run
run: protogen
@echo "Running transformers..."
bash run.sh
@echo "transformers run."
.PHONY: test
test: protogen
@echo "Testing transformers..."
bash test.sh
@echo "transformers tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
bash protogen.sh
.PHONY: clean
clean: protogen-clean
$(RM) -r venv __pycache__

View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""
Extra gRPC server for MusicgenForConditionalGeneration models.
"""
from concurrent import futures
import argparse
import signal
import sys
import os
import time
import backend_pb2
import backend_pb2_grpc
import grpc
from scipy.io.wavfile import write as write_wav
from parler_tts import ParlerTTSForConditionalGeneration
from transformers import AutoTokenizer
import soundfile as sf
import torch
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer for the backend service.
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
"""
def Health(self, request, context):
"""
A gRPC method that returns the health status of the backend service.
Args:
request: A HealthRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Reply object that contains the health status of the backend service.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
"""
A gRPC method that loads a model into memory.
Args:
request: A LoadModelRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Result object that contains the result of the LoadModel operation.
"""
model_name = request.Model
device = "cuda:0" if torch.cuda.is_available() else "cpu"
try:
self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def TTS(self, request, context):
model_name = request.model
voice = request.voice
if voice == "":
voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
if model_name == "":
return backend_pb2.Result(success=False, message="request.model is required")
try:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
audio_arr = generation.cpu().numpy().squeeze()
print("[parler-tts] TTS generated!", file=sys.stderr)
sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
print("[parler-tts] TTS for", file=sys.stderr)
print(request, file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("[parler-tts] Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
print(f"[parler-tts] startup: {args}", file=sys.stderr)
serve(args.addr)

View File

@@ -0,0 +1,28 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
installRequirements
# https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions.
PYDIR=python3.10
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
if [ ! -d ${pyenv} ]; then
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
exit 1
fi
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py

View File

View File

@@ -0,0 +1,4 @@
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
llvmlite==0.43.0
numba==0.60.0
grpcio-tools==1.42.0

View File

@@ -0,0 +1,3 @@
transformers
accelerate
torch==2.4.1

View File

@@ -0,0 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.4.1+cu118
torchaudio==2.4.1+cu118
transformers
accelerate

View File

@@ -0,0 +1,4 @@
torch==2.4.1
torchaudio==2.4.1
transformers
accelerate

View File

@@ -0,0 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch==2.3.0+rocm6.0
torchaudio==2.3.0+rocm6.0
transformers
accelerate

View File

@@ -1,6 +1,8 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch==2.3.110+xpu
torch==2.3.1+cxx11.abi
torchaudio==2.3.1+cxx11.abi
oneccl_bind_pt==2.3.100+xpu
optimum[openvino]
faster-whisper
transformers
accelerate

View File

@@ -0,0 +1,4 @@
grpcio==1.69.0
certifi
llvmlite==0.43.0
setuptools

View File

@@ -0,0 +1,4 @@
#!/bin/bash
source $(dirname $0)/../common/libbackend.sh
startBackend $@

View File

@@ -0,0 +1,81 @@
"""
A test script to test the gRPC service
"""
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc
import grpc
class TestBackendServicer(unittest.TestCase):
"""
TestBackendServicer is the class that tests the gRPC service
"""
def setUp(self):
"""
This method sets up the gRPC service by starting the server
"""
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
def tearDown(self) -> None:
"""
This method tears down the gRPC service by terminating the server
"""
self.service.terminate()
self.service.wait()
def test_server_startup(self):
"""
This method tests if the server starts up successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()
def test_load_model(self):
"""
This method tests if the model is loaded successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
self.assertTrue(response.success)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()
def test_tts(self):
"""
This method tests if the embeddings are generated successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
self.assertTrue(response.success)
tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
tts_response = stub.TTS(tts_request)
self.assertIsNotNone(tts_response)
except Exception as err:
print(err)
self.fail("TTS service failed")
finally:
self.tearDown()

View File

@@ -0,0 +1,6 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
runUnittests

View File

@@ -21,7 +21,7 @@ import torch.cuda
XPU=os.environ.get("XPU", "0") == "1"
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
from transformers import AutoProcessor, MusicgenForConditionalGeneration
from scipy.io import wavfile
import outetts
@@ -245,10 +245,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
autoTokenizer = False
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
self.SentenceTransformer = True
elif request.Type == "Mamba":
autoTokenizer = False
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = MambaForCausalLM.from_pretrained(model_name)
else:
print("Automodel", file=sys.stderr)
self.model = AutoModel.from_pretrained(model_name,

View File

@@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
}
}
if (u & FLAG_IMAGE) == FLAG_IMAGE {
imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
imageBackends := []string{"diffusers", "stablediffusion"}
if !slices.Contains(imageBackends, c.Backend) {
return false
}

View File

@@ -48,66 +48,5 @@ var _ = Describe("Test cases for config related functions", func() {
// config should includes whisper-1 models's api.config
Expect(loadedModelNames).To(ContainElements("whisper-1"))
})
It("Test new loadconfig", func() {
bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
Expect(err).To(BeNil())
configs := bcl.GetAllBackendConfigs()
loadedModelNames := []string{}
for _, v := range configs {
loadedModelNames = append(loadedModelNames, v.Name)
}
Expect(configs).ToNot(BeNil())
totalModels := len(loadedModelNames)
Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
// config should includes text-embedding-ada-002 models's api.config
Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
// config should includes rwkv_test models's api.config
Expect(loadedModelNames).To(ContainElements("rwkv_test"))
// config should includes whisper-1 models's api.config
Expect(loadedModelNames).To(ContainElements("whisper-1"))
// create a temp directory and store a temporary model
tmpdir, err := os.MkdirTemp("", "test")
Expect(err).ToNot(HaveOccurred())
defer os.RemoveAll(tmpdir)
// create a temporary model
model := `name: "test-model"
description: "test model"
options:
- foo
- bar
- baz
`
modelFile := tmpdir + "/test-model.yaml"
err = os.WriteFile(modelFile, []byte(model), 0644)
Expect(err).ToNot(HaveOccurred())
err = bcl.LoadBackendConfigsFromPath(tmpdir)
Expect(err).ToNot(HaveOccurred())
configs = bcl.GetAllBackendConfigs()
Expect(len(configs)).ToNot(Equal(totalModels))
loadedModelNames = []string{}
var testModel BackendConfig
for _, v := range configs {
loadedModelNames = append(loadedModelNames, v.Name)
if v.Name == "test-model" {
testModel = v
}
}
Expect(loadedModelNames).To(ContainElements("test-model"))
Expect(testModel.Description).To(Equal("test model"))
Expect(testModel.Options).To(ContainElements("foo", "bar", "baz"))
})
})
})

View File

@@ -687,10 +687,6 @@ var _ = Describe("API test", func() {
Name: "model-gallery",
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
},
{
Name: "localai",
URL: "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
},
}
application, err := application.New(
@@ -768,8 +764,10 @@ var _ = Describe("API test", func() {
}
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
ID: "localai@sd-1.5-ggml",
Name: "stablediffusion",
ID: "model-gallery@stablediffusion",
Overrides: map[string]interface{}{
"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -780,14 +778,14 @@ var _ = Describe("API test", func() {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
fmt.Println(response)
return response["processed"].(bool)
}, "1200s", "10s").Should(Equal(true))
}, "360s", "10s").Should(Equal(true))
resp, err := http.Post(
"http://127.0.0.1:9090/v1/images/generations",
"application/json",
bytes.NewBuffer([]byte(`{
"prompt": "a lovely cat",
"step": 1, "seed":9000,
"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
"mode": 2, "seed":9000,
"size": "256x256", "n":2}`)))
// The response should contain an URL
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
@@ -796,7 +794,6 @@ var _ = Describe("API test", func() {
imgUrlResp := &schema.OpenAIResponse{}
err = json.Unmarshal(dat, imgUrlResp)
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
imgUrl := imgUrlResp.Data[0].URL
Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)
@@ -1003,7 +1000,7 @@ var _ = Describe("API test", func() {
}
}
deleteBody := schema.StoresDelete{
deleteBody := schema.StoresReset{
Keys: [][]float32{
{0.1, 0.2, 0.3},
},

View File

@@ -28,7 +28,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
}
}
// BackendShutdownEndpoint shuts down the specified backend
// BackendMonitorEndpoint shuts down the specified backend
// @Summary Backend monitor endpoint
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
// @Router /backend/shutdown [post]

View File

@@ -36,9 +36,9 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
}
}
func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
func StoresResetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.StoresDelete)
input := new(schema.StoresReset)
if err := c.BodyParser(input); err != nil {
return err
@@ -49,7 +49,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
return err
}
if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
if _, err := sb.StoresReset(c.Context(), nil); err != nil {
return err
}
@@ -57,37 +57,6 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
}
}
func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.StoresGet)
if err := c.BodyParser(input); err != nil {
return err
}
sb, err := backend.StoreBackend(sl, appConfig, input.Store)
if err != nil {
return err
}
keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
if err != nil {
return err
}
res := schema.StoresGetResponse{
Keys: keys,
Values: make([]string, len(vals)),
}
for i, v := range vals {
res.Values[i] = string(v)
}
return c.JSON(res)
}
}
func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(schema.StoresFind)

View File

@@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
}
if m == "" {
m = "stablediffusion"
m = model.StableDiffusionBackend
}
log.Debug().Msgf("Loading model: %+v", m)
@@ -129,9 +129,9 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
switch config.Backend {
case "stablediffusion":
config.Backend = model.StableDiffusionGGMLBackend
config.Backend = model.StableDiffusionBackend
case "":
config.Backend = model.StableDiffusionGGMLBackend
config.Backend = model.StableDiffusionBackend
}
if !strings.Contains(input.Size, "x") {

View File

@@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"strconv"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
@@ -297,14 +296,6 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
}
}
}
// If a quality was defined as number, convert it to step
if input.Quality != "" {
q, err := strconv.Atoi(input.Quality)
if err == nil {
config.Step = q
}
}
}
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {

View File

@@ -39,8 +39,7 @@ func RegisterLocalAIRoutes(router *fiber.App,
// Stores
sl := model.NewModelLoader("")
router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
router.Post("/stores/reset", localai.StoresDeleteEndpoint(sl, appConfig))
router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
if !appConfig.DisableMetrics {

View File

@@ -47,21 +47,8 @@ type StoresSet struct {
Values []string `json:"values" yaml:"values"`
}
type StoresDelete struct {
type StoresReset struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys"`
}
type StoresGet struct {
Store string `json:"store,omitempty" yaml:"store,omitempty"`
Keys [][]float32 `json:"keys" yaml:"keys"`
}
type StoresGetResponse struct {
Keys [][]float32 `json:"keys" yaml:"keys"`
Values []string `json:"values" yaml:"values"`
}
type StoresFind struct {

View File

@@ -191,9 +191,8 @@ type OpenAIRequest struct {
Stream bool `json:"stream"`
// Image (not supported by OpenAI)
Mode int `json:"mode"`
Quality string `json:"quality"`
Step int `json:"step"`
Mode int `json:"mode"`
Step int `json:"step"`
// A grammar to constrain the LLM output
Grammar string `json:"grammar" yaml:"grammar"`

View File

@@ -5219,23 +5219,6 @@
- filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
- !!merge <<: *llama31
name: "deepseek-r1-distill-llama-8b"
icon: "https://avatars.githubusercontent.com/u/148330874"
urls:
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
- https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF
description: |
DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
overrides:
parameters:
model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
files:
- filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
- &deepseek ## Deepseek
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
name: "deepseek-coder-v2-lite-instruct"
@@ -5301,86 +5284,6 @@
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
- &deepseek-r1 ## Start DeepSeek-R1
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
name: "deepseek-r1-distill-qwen-1.5b"
icon: "https://avatars.githubusercontent.com/u/148330874"
urls:
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
description: |
DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
overrides:
parameters:
model: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
files:
- filename: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
sha256: 1741e5b2d062b07acf048bf0d2c514dadf2a48f94e2b4aa0cfe069af3838ee2f
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
name: "deepseek-r1-distill-qwen-7b"
urls:
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
overrides:
parameters:
model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
files:
- filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
name: "deepseek-r1-distill-qwen-14b"
urls:
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF
overrides:
parameters:
model: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
files:
- filename: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
sha256: 0b319bd0572f2730bfe11cc751defe82045fad5085b4e60591ac2cd2d9633181
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
name: "deepseek-r1-distill-qwen-32b"
urls:
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF
overrides:
parameters:
model: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
files:
- filename: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
sha256: bed9b0f551f5b95bf9da5888a48f0f87c37ad6b72519c4cbd775f54ac0b9fc62
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
name: "deepseek-r1-distill-llama-8b"
icon: "https://avatars.githubusercontent.com/u/148330874"
urls:
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF
overrides:
parameters:
model: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
files:
- filename: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
sha256: 87bcba20b4846d8dadf753d3ff48f9285d131fc95e3e0e7e934d4f20bc896f5d
uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
name: "deepseek-r1-distill-llama-70b"
icon: "https://avatars.githubusercontent.com/u/148330874"
urls:
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
- https://huggingface.co/bartowski/DeepSeek-R 1-Distill-Llama-70B-GGUF
overrides:
parameters:
model: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
files:
- filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
- &qwen2 ## Start QWEN2
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
name: "qwen2-7b-instruct"
@@ -5714,32 +5617,6 @@
- filename: marco-o1-uncensored.Q4_K_M.gguf
sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
- !!merge <<: *qwen2
name: "minicpm-o-2_6"
icon: https://avatars.githubusercontent.com/u/89920203
urls:
- https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf
- https://huggingface.co/openbmb/MiniCPM-o-2_6
description: |
MiniCPM-o 2.6 is the latest and most capable model in the MiniCPM-o series. The model is built in an end-to-end fashion based on SigLip-400M, Whisper-medium-300M, ChatTTS-200M, and Qwen2.5-7B with a total of 8B parameters
tags:
- llm
- multimodal
- gguf
- gpu
- qwen2
- cpu
overrides:
mmproj: minicpm-o-2_6-mmproj-f16.gguf
parameters:
model: minicpm-o-2_6-Q4_K_M.gguf
files:
- filename: minicpm-o-2_6-Q4_K_M.gguf
sha256: 4f635fc0c0bb88d50ccd9cf1f1e5892b5cb085ff88fe0d8e1148fd9a8a836bc2
uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/Model-7.6B-Q4_K_M.gguf
- filename: minicpm-o-2_6-mmproj-f16.gguf
sha256: efa4f7d96aa0f838f2023fc8d28e519179b16f1106777fa9280b32628191aa3e
uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/mmproj-model-f16.gguf
- !!merge <<: *qwen2
name: "minicpm-v-2_6"
license: apache-2.0
@@ -11137,7 +11014,7 @@
uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
- name: stable-diffusion-3-medium
icon: https://avatars.githubusercontent.com/u/100950301
icon: https://huggingface.co/leo009/stable-diffusion-3-medium/resolve/main/sd3demo.jpg
license: other
description: |
Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
@@ -11151,63 +11028,6 @@
- sd-3
- gpu
url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
- name: sd-1.5-ggml
icon: https://avatars.githubusercontent.com/u/37351293
license: creativeml-openrail-m
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
description: |
Stable Diffusion 1.5
urls:
- https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF
tags:
- text-to-image
- stablediffusion
- gpu
- cpu
overrides:
options:
- "sampler:euler"
parameters:
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
files:
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
- name: sd-3.5-medium-ggml
license: stabilityai-ai-community
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
description: |
Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
urls:
- https://huggingface.co/stabilityai/stable-diffusion-3.5-medium
- https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF
tags:
- text-to-image
- stablediffusion
- gpu
- cpu
icon: https://avatars.githubusercontent.com/u/100950301
overrides:
options:
- "clip_l_path:clip_l-Q4_0.gguf"
- "clip_g_path:clip_g-Q4_0.gguf"
- "t5xxl_path:t5xxl-Q4_0.gguf"
- "sampler:euler"
parameters:
model: sd3.5_medium-Q4_0.gguf
files:
- filename: "sd3.5_medium-Q4_0.gguf"
sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf"
uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf"
- filename: clip_g-Q4_0.gguf
sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf
- filename: clip_l-Q4_0.gguf
sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf
- filename: t5xxl-Q4_0.gguf
sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf
- name: sd-3.5-large-ggml
license: stabilityai-ai-community
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
@@ -11218,10 +11038,10 @@
- https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
tags:
- text-to-image
- stablediffusion
- flux
- gpu
- cpu
icon: https://avatars.githubusercontent.com/u/100950301
icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png
overrides:
parameters:
model: sd3.5_large-Q4_0.gguf
@@ -11240,7 +11060,6 @@
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
- &flux
name: flux.1-dev
icon: https://avatars.githubusercontent.com/u/164064024
license: flux-1-dev-non-commercial-license
description: |
FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
@@ -11264,6 +11083,7 @@
- !!merge <<: *flux
name: flux.1-schnell
license: apache-2
icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
description: |
FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
Key Features
@@ -11296,6 +11116,7 @@
- flux
- gpu
- cpu
icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
overrides:
parameters:
model: flux1-dev-Q2_K.gguf
@@ -11315,7 +11136,6 @@
- &whisper ## Whisper
url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
name: "whisper-1"
icon: https://avatars.githubusercontent.com/u/14957082
license: "MIT"
urls:
- https://github.com/ggerganov/whisper.cpp
@@ -11493,7 +11313,6 @@
description: |
Stable Diffusion in NCNN with c++, supported txt2img and img2img
name: stablediffusion-cpp
icon: https://avatars.githubusercontent.com/u/100950301
- &piper ## Piper TTS
url: github:mudler/LocalAI/gallery/piper.yaml@master
name: voice-en-us-kathleen-low
@@ -12074,7 +11893,6 @@
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
sha256: 0299a5e7f481ba853404e9f0e1515a94d5409585d76963fa4d30c64bd630aa99
- name: "silero-vad"
icon: https://github.com/snakers4/silero-models/raw/master/files/silero_logo.jpg
url: github:mudler/LocalAI/gallery/virtual.yaml@master
urls:
- https://github.com/snakers4/silero-vad
@@ -12094,7 +11912,6 @@
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
- name: "bark-cpp-small"
icon: https://avatars.githubusercontent.com/u/99442120
url: github:mudler/LocalAI/gallery/virtual.yaml@master
license: mit
urls:

1
go.mod
View File

@@ -93,6 +93,7 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nikolalohinski/gonja/v2 v2.3.2 // indirect
github.com/philippgille/chromem-go v0.7.0 // indirect
github.com/pion/datachannel v1.5.10 // indirect
github.com/pion/dtls/v2 v2.2.12 // indirect
github.com/pion/ice/v2 v2.3.37 // indirect

2
go.sum
View File

@@ -611,6 +611,8 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1H
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
github.com/philippgille/chromem-go v0.7.0 h1:4jfvfyKymjKNfGxBUhHUcj1kp7B17NL/I1P+vGh1RvY=
github.com/philippgille/chromem-go v0.7.0/go.mod h1:hTd+wGEm/fFPQl7ilfCwQXkgEUxceYh86iIdoKMolPo=
github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pion/datachannel v1.5.8 h1:ph1P1NsGkazkjrvyMfhRBUAWMxugJjq2HfQifaOoSNo=

View File

@@ -21,16 +21,14 @@ import (
)
const (
HuggingFacePrefix = "huggingface://"
HuggingFacePrefix1 = "hf://"
HuggingFacePrefix2 = "hf.co/"
OCIPrefix = "oci://"
OllamaPrefix = "ollama://"
HTTPPrefix = "http://"
HTTPSPrefix = "https://"
GithubURI = "github:"
GithubURI2 = "github://"
LocalPrefix = "file://"
HuggingFacePrefix = "huggingface://"
OCIPrefix = "oci://"
OllamaPrefix = "ollama://"
HTTPPrefix = "http://"
HTTPSPrefix = "https://"
GithubURI = "github:"
GithubURI2 = "github://"
LocalPrefix = "file://"
)
type URI string
@@ -129,8 +127,6 @@ func (u URI) LooksLikeURL() bool {
return strings.HasPrefix(string(u), HTTPPrefix) ||
strings.HasPrefix(string(u), HTTPSPrefix) ||
strings.HasPrefix(string(u), HuggingFacePrefix) ||
strings.HasPrefix(string(u), HuggingFacePrefix1) ||
strings.HasPrefix(string(u), HuggingFacePrefix2) ||
strings.HasPrefix(string(u), GithubURI) ||
strings.HasPrefix(string(u), OllamaPrefix) ||
strings.HasPrefix(string(u), OCIPrefix) ||
@@ -174,10 +170,8 @@ func (s URI) ResolveURL() string {
projectPath := strings.Join(repoPath[2:], "/")
return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
case strings.HasPrefix(string(s), HuggingFacePrefix) || strings.HasPrefix(string(s), HuggingFacePrefix1) || strings.HasPrefix(string(s), HuggingFacePrefix2):
case strings.HasPrefix(string(s), HuggingFacePrefix):
repository := strings.Replace(string(s), HuggingFacePrefix, "", 1)
repository = strings.Replace(repository, HuggingFacePrefix1, "", 1)
repository = strings.Replace(repository, HuggingFacePrefix2, "", 1)
// convert repository to a full URL.
// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
owner := strings.Split(repository, "/")[0]

View File

@@ -34,7 +34,7 @@ type Tool struct {
}
type Tools []Tool
// ToJSONStructure converts a list of functions to a JSON structure that can be parsed to a grammar
// ToJSONNameStructure converts a list of functions to a JSON structure that can be parsed to a grammar
// This allows the LLM to return a response of the type: { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
nameKey := defaultFunctionNameKey

View File

@@ -46,8 +46,7 @@ type Backend interface {
Status(ctx context.Context) (*pb.StatusResponse, error)
StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error)
StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error)
StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
StoresReset(ctx context.Context, in *pb.StoresResetOptions, opts ...grpc.CallOption) (*pb.Result, error)
StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)

View File

@@ -80,11 +80,7 @@ func (llm *Base) StoresSet(*pb.StoresSetOptions) error {
return fmt.Errorf("unimplemented")
}
func (llm *Base) StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error) {
return pb.StoresGetResult{}, fmt.Errorf("unimplemented")
}
func (llm *Base) StoresDelete(*pb.StoresDeleteOptions) error {
func (llm *Base) StoresReset(*pb.StoresResetOptions) error {
return fmt.Errorf("unimplemented")
}

View File

@@ -303,7 +303,7 @@ func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ..
return client.StoresSet(ctx, in, opts...)
}
func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
func (c *Client) StoreReset(ctx context.Context, in *pb.StoresResetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
@@ -318,25 +318,7 @@ func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, o
}
defer conn.Close()
client := pb.NewBackendClient(conn)
return client.StoresDelete(ctx, in, opts...)
}
func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
}
c.setBusy(true)
defer c.setBusy(false)
c.wdMark()
defer c.wdUnMark()
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)
return client.StoresGet(ctx, in, opts...)
return client.StoresReset(ctx, in, opts...)
}
func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {

View File

@@ -71,12 +71,8 @@ func (e *embedBackend) StoresSet(ctx context.Context, in *pb.StoresSetOptions, o
return e.s.StoresSet(ctx, in)
}
func (e *embedBackend) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
return e.s.StoresDelete(ctx, in)
}
func (e *embedBackend) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
return e.s.StoresGet(ctx, in)
func (e *embedBackend) StoresReset(ctx context.Context, in *pb.StoresResetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
return e.s.StoresReset(ctx, in)
}
func (e *embedBackend) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {

View File

@@ -21,8 +21,7 @@ type LLM interface {
Status() (pb.StatusResponse, error)
StoresSet(*pb.StoresSetOptions) error
StoresDelete(*pb.StoresDeleteOptions) error
StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error)
StoresReset(*pb.StoresResetOptions) error
StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)
VAD(*pb.VADRequest) (pb.VADResponse, error)

View File

@@ -191,28 +191,16 @@ func (s *server) StoresSet(ctx context.Context, in *pb.StoresSetOptions) (*pb.Re
return &pb.Result{Message: "Set key", Success: true}, nil
}
func (s *server) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions) (*pb.Result, error) {
func (s *server) StoresReset(ctx context.Context, in *pb.StoresResetOptions) (*pb.Result, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
err := s.llm.StoresDelete(in)
err := s.llm.StoresReset(in)
if err != nil {
return &pb.Result{Message: fmt.Sprintf("Error deleting entry: %s", err.Error()), Success: false}, err
}
return &pb.Result{Message: "Deleted key", Success: true}, nil
}
func (s *server) StoresGet(ctx context.Context, in *pb.StoresGetOptions) (*pb.StoresGetResult, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
res, err := s.llm.StoresGet(in)
if err != nil {
return nil, err
}
return &res, nil
return &pb.Result{Message: "Deleted mem db", Success: true}, nil
}
func (s *server) StoresFind(ctx context.Context, in *pb.StoresFindOptions) (*pb.StoresFindResult, error) {

View File

@@ -29,14 +29,11 @@ var Aliases map[string]string = map[string]string{
"langchain-huggingface": LCHuggingFaceBackend,
"transformers-musicgen": TransformersBackend,
"sentencetransformers": TransformersBackend,
"mamba": TransformersBackend,
"stablediffusion": StableDiffusionGGMLBackend,
}
var TypeAlias map[string]string = map[string]string{
"sentencetransformers": "SentenceTransformer",
"huggingface-embeddings": "SentenceTransformer",
"mamba": "Mamba",
"transformers-musicgen": "MusicgenForConditionalGeneration",
}
@@ -48,7 +45,6 @@ const (
LLamaCPP = "llama-cpp"
LLamaCPPAVX2 = "llama-cpp-avx2"
LLamaCPPAVX512 = "llama-cpp-avx512"
LLamaCPPAVX = "llama-cpp-avx"
LLamaCPPFallback = "llama-cpp-fallback"
LLamaCPPCUDA = "llama-cpp-cuda"
@@ -58,27 +54,15 @@ const (
LLamaCPPGRPC = "llama-cpp-grpc"
WhisperBackend = "whisper"
StableDiffusionGGMLBackend = "stablediffusion-ggml"
PiperBackend = "piper"
LCHuggingFaceBackend = "huggingface"
WhisperBackend = "whisper"
StableDiffusionBackend = "stablediffusion"
PiperBackend = "piper"
LCHuggingFaceBackend = "huggingface"
TransformersBackend = "transformers"
LocalStoreBackend = "local-store"
)
var llamaCPPVariants = []string{
LLamaCPPAVX2,
LLamaCPPAVX512,
LLamaCPPAVX,
LLamaCPPFallback,
LLamaCPPCUDA,
LLamaCPPHipblas,
LLamaCPPSycl16,
LLamaCPPSycl32,
LLamaCPPGRPC,
}
func backendPath(assetDir, backend string) string {
return filepath.Join(assetDir, "backend-assets", "grpc", backend)
}
@@ -120,14 +104,40 @@ ENTRY:
if AutoDetect {
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
// when starting the service
foundVariants := map[string]bool{}
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas, foundSycl16, foundSycl32 := false, false, false, false, false, false, false, false
if _, ok := backends[LLamaCPP]; !ok {
for _, e := range entry {
for _, v := range llamaCPPVariants {
if strings.Contains(e.Name(), v) && !foundVariants[v] {
backends[LLamaCPP] = append(backends[LLamaCPP], v)
foundVariants[v] = true
}
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2)
foundLCPPAVX2 = true
}
if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX)
foundLCPPAVX = true
}
if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
foundLCPPFallback = true
}
if strings.Contains(e.Name(), LLamaCPPGRPC) && !foundLCPPGRPC {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
foundLCPPGRPC = true
}
if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
foundLCPPCuda = true
}
if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
foundLCPPHipblas = true
}
if strings.Contains(e.Name(), LLamaCPPSycl16) && !foundSycl16 {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl16)
foundSycl16 = true
}
if strings.Contains(e.Name(), LLamaCPPSycl32) && !foundSycl32 {
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl32)
foundSycl32 = true
}
}
}
@@ -270,12 +280,6 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
selectedProcess = p
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
p := backendPath(assetDir, LLamaCPPAVX512)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
selectedProcess = p
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
p := backendPath(assetDir, LLamaCPPAVX)
if _, err := os.Stat(p); err == nil {

View File

@@ -0,0 +1,35 @@
//go:build stablediffusion
// +build stablediffusion
package stablediffusion
import (
stableDiffusion "github.com/mudler/go-stable-diffusion"
)
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
if height > 512 || width > 512 {
return stableDiffusion.GenerateImageUpscaled(
height,
width,
step,
seed,
positive_prompt,
negative_prompt,
dst,
asset_dir,
)
}
return stableDiffusion.GenerateImage(
height,
width,
mode,
step,
seed,
positive_prompt,
negative_prompt,
dst,
"",
asset_dir,
)
}

View File

@@ -0,0 +1,10 @@
//go:build !stablediffusion
// +build !stablediffusion
package stablediffusion
import "fmt"
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
return fmt.Errorf("This version of LocalAI was built without the stablediffusion tag")
}

View File

@@ -0,0 +1,20 @@
package stablediffusion
import "os"
type StableDiffusion struct {
assetDir string
}
func New(assetDir string) (*StableDiffusion, error) {
if _, err := os.Stat(assetDir); err != nil {
return nil, err
}
return &StableDiffusion{
assetDir: assetDir,
}, nil
}
func (s *StableDiffusion) GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string) error {
return GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst, s.assetDir)
}

View File

@@ -1,155 +0,0 @@
package store
import (
"context"
"fmt"
grpc "github.com/mudler/LocalAI/pkg/grpc"
"github.com/mudler/LocalAI/pkg/grpc/proto"
)
// Wrapper for the GRPC client so that simple use cases are handled without verbosity
// SetCols sets multiple key-value pairs in the store
// It's in columnar format so that keys[i] is associated with values[i]
func SetCols(ctx context.Context, c grpc.Backend, keys [][]float32, values [][]byte) error {
protoKeys := make([]*proto.StoresKey, len(keys))
for i, k := range keys {
protoKeys[i] = &proto.StoresKey{
Floats: k,
}
}
protoValues := make([]*proto.StoresValue, len(values))
for i, v := range values {
protoValues[i] = &proto.StoresValue{
Bytes: v,
}
}
setOpts := &proto.StoresSetOptions{
Keys: protoKeys,
Values: protoValues,
}
res, err := c.StoresSet(ctx, setOpts)
if err != nil {
return err
}
if res.Success {
return nil
}
return fmt.Errorf("failed to set keys: %v", res.Message)
}
// SetSingle sets a single key-value pair in the store
// Don't call this in a tight loop, instead use SetCols
func SetSingle(ctx context.Context, c grpc.Backend, key []float32, value []byte) error {
return SetCols(ctx, c, [][]float32{key}, [][]byte{value})
}
// DeleteCols deletes multiple key-value pairs from the store
// It's in columnar format so that keys[i] is associated with values[i]
func DeleteCols(ctx context.Context, c grpc.Backend, keys [][]float32) error {
protoKeys := make([]*proto.StoresKey, len(keys))
for i, k := range keys {
protoKeys[i] = &proto.StoresKey{
Floats: k,
}
}
deleteOpts := &proto.StoresDeleteOptions{
Keys: protoKeys,
}
res, err := c.StoresDelete(ctx, deleteOpts)
if err != nil {
return err
}
if res.Success {
return nil
}
return fmt.Errorf("failed to delete keys: %v", res.Message)
}
// DeleteSingle deletes a single key-value pair from the store
// Don't call this in a tight loop, instead use DeleteCols
func DeleteSingle(ctx context.Context, c grpc.Backend, key []float32) error {
return DeleteCols(ctx, c, [][]float32{key})
}
// GetCols gets multiple key-value pairs from the store
// It's in columnar format so that keys[i] is associated with values[i]
// Be warned the keys are sorted and will be returned in a different order than they were input
// There is no guarantee as to how the keys are sorted
func GetCols(ctx context.Context, c grpc.Backend, keys [][]float32) ([][]float32, [][]byte, error) {
protoKeys := make([]*proto.StoresKey, len(keys))
for i, k := range keys {
protoKeys[i] = &proto.StoresKey{
Floats: k,
}
}
getOpts := &proto.StoresGetOptions{
Keys: protoKeys,
}
res, err := c.StoresGet(ctx, getOpts)
if err != nil {
return nil, nil, err
}
ks := make([][]float32, len(res.Keys))
for i, k := range res.Keys {
ks[i] = k.Floats
}
vs := make([][]byte, len(res.Values))
for i, v := range res.Values {
vs[i] = v.Bytes
}
return ks, vs, nil
}
// GetSingle gets a single key-value pair from the store
// Don't call this in a tight loop, instead use GetCols
func GetSingle(ctx context.Context, c grpc.Backend, key []float32) ([]byte, error) {
_, values, err := GetCols(ctx, c, [][]float32{key})
if err != nil {
return nil, err
}
if len(values) > 0 {
return values[0], nil
}
return nil, fmt.Errorf("failed to get key")
}
// Find similar keys to the given key. Returns the keys, values, and similarities
func Find(ctx context.Context, c grpc.Backend, key []float32, topk int) ([][]float32, [][]byte, []float32, error) {
findOpts := &proto.StoresFindOptions{
Key: &proto.StoresKey{
Floats: key,
},
TopK: int32(topk),
}
res, err := c.StoresFind(ctx, findOpts)
if err != nil {
return nil, nil, nil, err
}
ks := make([][]float32, len(res.Keys))
vs := make([][]byte, len(res.Values))
for i, k := range res.Keys {
ks[i] = k.Floats
}
for i, v := range res.Values {
vs[i] = v.Bytes
}
return ks, vs, res.Similarities, nil
}

View File

@@ -1645,9 +1645,6 @@ const docTemplate = `{
"prompt": {
"description": "Prompt is read only by completion/image API calls"
},
"quality": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},

View File

@@ -1638,9 +1638,6 @@
"prompt": {
"description": "Prompt is read only by completion/image API calls"
},
"quality": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},

View File

@@ -570,8 +570,6 @@ definitions:
type: number
prompt:
description: Prompt is read only by completion/image API calls
quality:
type: string
repeat_last_n:
type: integer
repeat_penalty:

View File

@@ -54,7 +54,7 @@ var _ = BeforeSuite(func() {
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "50m").ShouldNot(HaveOccurred())
}, "20m").ShouldNot(HaveOccurred())
})
var _ = AfterSuite(func() {

View File

@@ -123,9 +123,8 @@ var _ = Describe("E2E test", func() {
It("correctly", func() {
resp, err := client.CreateImage(context.TODO(),
openai.ImageRequest{
Prompt: "test",
Quality: "1",
Size: openai.CreateImageSize256x256,
Prompt: "test",
Size: openai.CreateImageSize512x512,
},
)
Expect(err).ToNot(HaveOccurred())
@@ -136,8 +135,7 @@ var _ = Describe("E2E test", func() {
resp, err := client.CreateImage(context.TODO(),
openai.ImageRequest{
Prompt: "test",
Size: openai.CreateImageSize256x256,
Quality: "1",
Size: openai.CreateImageSize512x512,
ResponseFormat: openai.CreateImageResponseFormatURL,
},
)
@@ -149,8 +147,7 @@ var _ = Describe("E2E test", func() {
resp, err := client.CreateImage(context.TODO(),
openai.ImageRequest{
Prompt: "test",
Size: openai.CreateImageSize256x256,
Quality: "1",
Size: openai.CreateImageSize512x512,
ResponseFormat: openai.CreateImageResponseFormatB64JSON,
},
)

View File

@@ -4,7 +4,6 @@ import (
"context"
"embed"
"math"
"math/rand"
"os"
"path/filepath"
@@ -23,19 +22,6 @@ import (
//go:embed backend-assets/*
var backendAssets embed.FS
func normalize(vecs [][]float32) {
for i, k := range vecs {
norm := float64(0)
for _, x := range k {
norm += float64(x * x)
}
norm = math.Sqrt(norm)
for j, x := range k {
vecs[i][j] = x / float32(norm)
}
}
}
var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
Context("Embedded Store get,set and delete", func() {
var sl *model.ModelLoader
@@ -84,6 +70,10 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
})
It("should be able to set a key", func() {
sc.StoresSet(context.Background(), &store.StoresSetOptions{
Keys: [][]float32{{0.1, 0.2, 0.3}},
Values: [][]byte{[]byte("test")},
})
err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
Expect(err).ToNot(HaveOccurred())
})
@@ -206,8 +196,17 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
normalize(keys)
// normalize the keys
for i, k := range keys {
norm := float64(0)
for _, x := range k {
norm += float64(x * x)
}
norm = math.Sqrt(norm)
for j, x := range k {
keys[i][j] = x / float32(norm)
}
}
err := store.SetCols(context.Background(), sc, keys, vals)
Expect(err).ToNot(HaveOccurred())
@@ -230,121 +229,5 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
Expect(ks[1]).To(Equal(keys[1]))
Expect(vals[1]).To(Equal(vals[1]))
})
It("It produces the correct cosine similarities for orthogonal and opposite unit vectors", func() {
keys := [][]float32{{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}}
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
err := store.SetCols(context.Background(), sc, keys, vals);
Expect(err).ToNot(HaveOccurred())
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
Expect(err).ToNot(HaveOccurred())
Expect(sims).To(Equal([]float32{1.0, 0.0, 0.0, -1.0}))
})
It("It produces the correct cosine similarities for orthogonal and opposite vectors", func() {
keys := [][]float32{{1.0, 0.0, 1.0}, {0.0, 2.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, -1.0}}
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
err := store.SetCols(context.Background(), sc, keys, vals);
Expect(err).ToNot(HaveOccurred())
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
Expect(err).ToNot(HaveOccurred())
Expect(sims[0]).To(BeNumerically("~", 1, 0.1))
Expect(sims[1]).To(BeNumerically("~", 0, 0.1))
Expect(sims[2]).To(BeNumerically("~", -0.7, 0.1))
Expect(sims[3]).To(BeNumerically("~", -1, 0.1))
})
expectTriangleEq := func(keys [][]float32, vals [][]byte) {
sims := map[string]map[string]float32{}
// compare every key vector pair and store the similarities in a lookup table
// that uses the values as keys
for i, k := range keys {
_, valsk, simsk, err := store.Find(context.Background(), sc, k, 9)
Expect(err).ToNot(HaveOccurred())
for j, v := range valsk {
p := string(vals[i])
q := string(v)
if sims[p] == nil {
sims[p] = map[string]float32{}
}
//log.Debug().Strs("vals", []string{p, q}).Float32("similarity", simsk[j]).Send()
sims[p][q] = simsk[j]
}
}
// Check that the triangle inequality holds for every combination of the triplet
// u, v and w
for _, simsu := range sims {
for w, simw := range simsu {
// acos(u,w) <= ...
uws := math.Acos(float64(simw))
// ... acos(u,v) + acos(v,w)
for v, _ := range simsu {
uvws := math.Acos(float64(simsu[v])) + math.Acos(float64(sims[v][w]))
//log.Debug().Str("u", u).Str("v", v).Str("w", w).Send()
//log.Debug().Float32("uw", simw).Float32("uv", simsu[v]).Float32("vw", sims[v][w]).Send()
Expect(uws).To(BeNumerically("<=", uvws))
}
}
}
}
It("It obeys the triangle inequality for normalized values", func() {
keys := [][]float32{
{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0},
{-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, -1.0},
{2.0, 3.0, 4.0}, {9.0, 7.0, 1.0}, {0.0, -1.2, 2.3},
}
vals := [][]byte{
[]byte("x"), []byte("y"), []byte("z"),
[]byte("-x"), []byte("-y"), []byte("-z"),
[]byte("u"), []byte("v"), []byte("w"),
}
normalize(keys[6:])
err := store.SetCols(context.Background(), sc, keys, vals);
Expect(err).ToNot(HaveOccurred())
expectTriangleEq(keys, vals)
})
It("It obeys the triangle inequality", func() {
rnd := rand.New(rand.NewSource(151))
keys := make([][]float32, 20)
vals := make([][]byte, 20)
for i := range keys {
k := make([]float32, 768)
for j := range k {
k[j] = rnd.Float32()
}
keys[i] = k
}
c := byte('a')
for i := range vals {
vals[i] = []byte{c}
c += 1
}
err := store.SetCols(context.Background(), sc, keys, vals);
Expect(err).ToNot(HaveOccurred())
expectTriangleEq(keys, vals)
})
})
})