docs(transformers): add docs section about transformers

fix: adapt tts CLI
feat(openai/tts): compat layer with openai tts
2026-05-20 22:58:34 -04:00 · 2024-03-15 18:02:15 +01:00 · 2024-03-14 19:24:50 +01:00 · 2024-03-14 18:15:28 +01:00 · 2024-03-14 18:12:47 +01:00
85 changed files with 671 additions and 4841 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,4 +3,4 @@ models
 examples/chatbot-ui/models
 examples/rwkv/models
 examples/**/models
-Dockerfile*
+Dockerfile
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,31 +0,0 @@
-
-root = true
-
-[*]
-indent_style = space
-indent_size = 2
-end_of_line = lf
-charset = utf-8
-trim_trailing_whitespace = true
-insert_final_newline = true
-
-[*.go]
-indent_style = tab
-
-[Makefile]
-indent_style = tab
-
-[*.proto]
-indent_size = 2
-
-[*.py]
-indent_size = 4
-
-[*.js]
-indent_size = 2
-
-[*.yaml]
-indent_size = 2
-
-[*.md]
-trim_trailing_whitespace = false
--- a/.github/release.yml
+++ b/.github/release.yml
@@ -12,23 +12,13 @@ changelog:
    - title: "Bug fixes :bug:"
      labels:
        - bug
-        - regression
    - title: Exciting New Features 🎉
      labels:
        - Semver-Minor
        - enhancement
-        - ux
-        - roadmap
-    - title: 🧠 Models
-      labels:
-        - area/ai-model
-    - title: 📖 Documentation and examples
-      labels:
-        - kind/documentation
-        - examples
    - title: 👒 Dependencies
      labels:
        - dependencies
    - title: Other Changes
      labels:
-        - "*"
+        - "*"
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -22,7 +22,6 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
-      makeflags: "-j3"
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -81,7 +80,6 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
-      makeflags: "-j3"
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -26,8 +26,6 @@ jobs:
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
      base-image: ${{ matrix.base-image }}
-      aio: ${{ matrix.aio }}
-      makeflags: "-j3"
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -51,7 +49,7 @@ jobs:
            base-image: "ubuntu:22.04"
          - build-type: ''
            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
            tag-suffix: '-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
@@ -81,24 +79,22 @@ jobs:
            cuda-major-version: "11"
            cuda-minor-version: "7"
            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
-            aio: "-aio-gpu-nvidia-cuda-11"
          - build-type: 'cublas'
            cuda-major-version: "12"
            cuda-minor-version: "1"
            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
            base-image: "ubuntu:22.04"
-            aio: "-aio-gpu-nvidia-cuda-12"
          - build-type: ''
            #platforms: 'linux/amd64,linux/arm64'
            platforms: 'linux/amd64'
@@ -110,11 +106,10 @@ jobs:
            runs-on: 'arc-runner-set'
          - build-type: 'hipblas'
            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
            tag-suffix: '-hipblas-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
-            aio: "-aio-gpu-hipblas"
            base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
            runs-on: 'arc-runner-set'
          - build-type: 'hipblas'
@@ -127,22 +122,20 @@ jobs:
            runs-on: 'arc-runner-set'
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
            tag-suffix: '-sycl-f16-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
-            aio: "-aio-gpu-intel-f16"
          - build-type: 'sycl_f32'
            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
            base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
            tag-suffix: '-sycl-f32-ffmpeg'
            ffmpeg: 'true'
            image-type: 'extras'
            runs-on: 'arc-runner-set'
-            aio: "-aio-gpu-intel-f32"
          # Core images
          - build-type: 'sycl_f16'
            platforms: 'linux/amd64'
@@ -205,9 +198,7 @@ jobs:
      cuda-minor-version: ${{ matrix.cuda-minor-version }}
      platforms: ${{ matrix.platforms }}
      runs-on: ${{ matrix.runs-on }}
-      aio: ${{ matrix.aio }}
      base-image: ${{ matrix.base-image }}
-      makeflags: "-j3"
    secrets:
      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -218,13 +209,12 @@ jobs:
        include:
          - build-type: ''
            platforms: 'linux/amd64'
-            tag-latest: 'auto'
+            tag-latest: 'false'
            tag-suffix: '-ffmpeg-core'
            ffmpeg: 'true'
            image-type: 'core'
            base-image: "ubuntu:22.04"
            runs-on: 'ubuntu-latest'
-            aio: "-aio-cpu"
          - build-type: 'cublas'
            cuda-major-version: "11"
            cuda-minor-version: "7"
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -46,16 +46,6 @@ on:
        required: true
        default: ''
        type: string
-      makeflags:
-        description: 'Make Flags'
-        required: false
-        default: ''
-        type: string
-      aio:
-        description: 'AIO Image Name'
-        required: false
-        default: ''
-        type: string
    secrets:
      dockerUsername:
        required: true
@@ -134,32 +124,7 @@ jobs:
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }}
-      - name: Docker meta AIO (quay.io)
-        if: inputs.aio != ''
-        id: meta_aio
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            quay.io/go-skynet/local-ai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }}
-      - name: Docker meta AIO (dockerhub)
-        if: inputs.aio != ''
-        id: meta_aio_dockerhub
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            localai/localai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }}
+
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
@@ -195,51 +160,12 @@ jobs:
            FFMPEG=${{ inputs.ffmpeg }}
            IMAGE_TYPE=${{ inputs.image-type }}
            BASE_IMAGE=${{ inputs.base-image }}
-            MAKEFLAGS=${{ inputs.makeflags }}
          context: .
          file: ./Dockerfile
          platforms: ${{ inputs.platforms }}
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
-      -
-        name: Inspect image
-        if: github.event_name != 'pull_request'
-        run: |
-          docker pull localai/localai:${{ steps.meta.outputs.version }}
-          docker image inspect localai/localai:${{ steps.meta.outputs.version }}
-          docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-          docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-      - name: Build and push AIO image
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v5
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio.outputs.tags }}
-          labels: ${{ steps.meta_aio.outputs.labels }}
-      - name: Build and push AIO image (dockerhub)
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v5
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
-          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
      - name: job summary
        run: |
          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
-      - name: job summary(AIO)
-        if: inputs.aio != ''
-        run: |
-          echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -105,65 +105,9 @@ jobs:
      - name: Test
        run: |
          GO_TAGS="stablediffusion tts" make test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3
-        timeout-minutes: 5
-
-  tests-aio-container:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
-      - name: Clone
-        uses: actions/checkout@v4
-        with: 
-          submodules: true
-      - name: Build images
-        run: |
-          docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core -t local-ai:tests -f Dockerfile .
-          BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
-      - name: Test
-        run: |
-          LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
-            make run-e2e-aio
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3
-        timeout-minutes: 5

  tests-apple:
-    runs-on: macOS-14
+    runs-on: macOS-latest
    strategy:
      matrix:
        go-version: ['1.21.x']
@@ -186,8 +130,4 @@ jobs:
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
-          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3
-        timeout-minutes: 5
+          CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
--- a/40
+++ b/40
@@ -63,9 +63,7 @@ WORKDIR /build
 RUN test -n "$TARGETARCH" \
    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')

-###################################
-###################################
-
+# Extras requirements
 FROM requirements-core as requirements-extras

 RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
@@ -95,11 +93,8 @@ FROM requirements-${IMAGE_TYPE} as builder
 ARG GO_TAGS="stablediffusion tts"
 ARG GRPC_BACKENDS
 ARG BUILD_GRPC=true
-ARG MAKEFLAGS
-
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
-ENV MAKEFLAGS=${MAKEFLAGS}
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
@@ -108,7 +103,6 @@ WORKDIR /build

 COPY . .
 COPY .git .
-RUN echo "GO_TAGS: $GO_TAGS"
 RUN make prepare

 # If we are building with clblas support, we need the libraries for the builds
@@ -122,10 +116,10 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build

 RUN if [ "${BUILD_GRPC}" = "true" ]; then \
-    git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+    git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
    cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
      -DgRPC_BUILD_TESTS=OFF \
-       ../.. && make install \
+       ../.. && make -j12 install \
    ; fi

 # Rebuild with defaults backends
@@ -145,12 +139,10 @@ ARG FFMPEG
 ARG BUILD_TYPE
 ARG TARGETARCH
 ARG IMAGE_TYPE=extras
-ARG MAKEFLAGS

 ENV BUILD_TYPE=${BUILD_TYPE}
 ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
-ENV MAKEFLAGS=${MAKEFLAGS}

 ARG CUDA_MAJOR_VERSION=11
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
@@ -194,43 +186,43 @@ COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/

 ## Duplicated from Makefile to avoid having a big layer that's hard to push
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/autogptq \
+	 make -C backend/python/autogptq \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/bark \
+	 make -C backend/python/bark \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/diffusers \
+	 make -C backend/python/diffusers \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/vllm \
+	 make -C backend/python/vllm \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/mamba \
+	 make -C backend/python/mamba \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/sentencetransformers \
+	 make -C backend/python/sentencetransformers \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/transformers \
+	 make -C backend/python/transformers \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/vall-e-x \
+	 make -C backend/python/vall-e-x \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/exllama \
+	 make -C backend/python/exllama \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/exllama2 \
+     make -C backend/python/exllama2 \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/petals \
+	 make -C backend/python/petals \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/transformers-musicgen \
+	 make -C backend/python/transformers-musicgen \
    ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-    make -C backend/python/coqui \
+	 make -C backend/python/coqui \
    ; fi

 # Make sure the models directory exists
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,8 +0,0 @@
-ARG BASE_IMAGE=ubuntu:22.04
-
-FROM ${BASE_IMAGE} 
-
-RUN apt-get update && apt-get install -y pciutils && apt-get clean
-
-COPY aio/ /aio
-ENTRYPOINT [ "/aio/entrypoint.sh" ]
--- a/281
+++ b/281
@@ -4,8 +4,11 @@ GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai

 # llama.cpp versions
-GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=56a00f0a2f48a85376f48b5ce77699df781631ae
+GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
+
+GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
+
+CPPLLAMA_VERSION?=19885d205e768579ab090d1e99281cae58c21b54

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -16,13 +19,13 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
-WHISPER_CPP_VERSION?=fff24a0148fe194df4997a738eeceddd724959c3
+WHISPER_CPP_VERSION?=37a709f6558c6d9783199e2b8cbb136e1c41d346

 # bert.cpp version
 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d

 # go-piper version
-PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
+PIPER_VERSION?=d6b6275ba037dabdba4a8b65dfdf6b2a73a67f07

 # stablediffusion version
 STABLEDIFFUSION_VERSION?=362df9da29f882dbf09ade61972d16a1f53c3485
@@ -35,7 +38,6 @@ export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
 export CMAKE_ARGS?=

 CGO_LDFLAGS?=
-CGO_LDFLAGS_WHISPER?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=git
@@ -70,7 +72,7 @@ UNAME_S := $(shell uname -s)
 endif

 ifeq ($(OS),Darwin)
-	
+	CGO_LDFLAGS += -lcblas -framework Accelerate
 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
 	endif
@@ -81,12 +83,6 @@ ifeq ($(OS),Darwin)
 	# disable metal if on Darwin and any other value is explicitly passed.
 	else ifneq ($(BUILD_TYPE),metal)
 		CMAKE_ARGS+=-DLLAMA_METAL=OFF
-		export LLAMA_NO_ACCELERATE=1
-	endif
-
-	ifeq ($(BUILD_TYPE),metal)
-#			-lcblas 	removed: it seems to always be listed as a duplicate flag.
-		CGO_LDFLAGS += -framework Accelerate
 	endif
 endif

@@ -95,12 +91,10 @@ ifeq ($(BUILD_TYPE),openblas)
 	export WHISPER_OPENBLAS=1
 endif

-
 ifeq ($(BUILD_TYPE),cublas)
 	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
 	export LLAMA_CUBLAS=1
 	export WHISPER_CUBLAS=1
-	CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
 endif

 ifeq ($(BUILD_TYPE),hipblas)
@@ -154,12 +148,12 @@ endif

 ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
 ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
 ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
-ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
 ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)

 GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
@@ -174,41 +168,40 @@ ifeq ($(BUILD_API_ONLY),true)
 	GRPC_BACKENDS=
 endif

-.PHONY: all test build vendor get-sources prepare-sources prepare
+.PHONY: all test build vendor

 all: help

-## BERT embeddings
-sources/go-bert:
-	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
-	cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
-
-sources/go-bert/libgobert.a: sources/go-bert
-	$(MAKE) -C sources/go-bert libgobert.a
-
-## go-llama-ggml
-sources/go-llama-ggml:
-	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
-	cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
-
-sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
-	$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
-
-## go-piper
-sources/go-piper:
-	git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
-	cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
-
-sources/go-piper/libpiper_binding.a: sources/go-piper
-	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
-
 ## GPT4ALL
 sources/gpt4all:
 	git clone --recurse-submodules $(GPT4ALL_REPO) sources/gpt4all
 	cd sources/gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1

-sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
-	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
+## go-piper
+sources/go-piper:
+	git clone --recurse-submodules https://github.com/mudler/go-piper sources/go-piper
+	cd sources/go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
+
+## BERT embeddings
+sources/go-bert:
+	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp sources/go-bert
+	cd sources/go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
+
+## stable diffusion
+sources/go-stable-diffusion:
+	git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
+	cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
+
+sources/go-stable-diffusion/libstablediffusion.a:
+	$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
+
+## tiny-dream
+sources/go-tiny-dream:
+	git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
+	cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
+
+sources/go-tiny-dream/libtinydream.a:
+	$(MAKE) -C sources/go-tiny-dream libtinydream.a

 ## RWKV
 sources/go-rwkv:
@@ -218,23 +211,23 @@ sources/go-rwkv:
 sources/go-rwkv/librwkv.a: sources/go-rwkv
 	cd sources/go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..

-## stable diffusion
-sources/go-stable-diffusion:
-	git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion sources/go-stable-diffusion
-	cd sources/go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
+sources/go-bert/libgobert.a: sources/go-bert
+	$(MAKE) -C sources/go-bert libgobert.a

-sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
-	$(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
+backend-assets/gpt4all: sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
+	mkdir -p backend-assets/gpt4all
+	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
+	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
+	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true

-## tiny-dream
-sources/go-tiny-dream:
-	git clone --recurse-submodules https://github.com/M0Rf30/go-tiny-dream sources/go-tiny-dream
-	cd sources/go-tiny-dream && git checkout -b build $(TINYDREAM_VERSION) && git submodule update --init --recursive --depth 1
+backend-assets/espeak-ng-data: sources/go-piper
+	mkdir -p backend-assets/espeak-ng-data
+	$(MAKE) -C sources/go-piper piper.o
+	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data

-sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
-	$(MAKE) -C sources/go-tiny-dream libtinydream.a
+sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
+	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a

-## whisper
 sources/whisper.cpp:
 	git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
 	cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
@@ -242,34 +235,47 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && make libwhisper.a

-get-sources: sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
+sources/go-llama:
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama
+	cd sources/go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
+
+sources/go-llama-ggml:
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp sources/go-llama-ggml
+	cd sources/go-llama-ggml && git checkout -b build $(GOLLAMA_STABLE_VERSION) && git submodule update --init --recursive --depth 1
+
+sources/go-llama/libbinding.a: sources/go-llama
+	$(MAKE) -C sources/go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
+
+sources/go-llama-ggml/libbinding.a: sources/go-llama-ggml
+	$(MAKE) -C sources/go-llama-ggml BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
+
+sources/go-piper/libpiper_binding.a: sources/go-piper
+	$(MAKE) -C sources/go-piper libpiper_binding.a example/main
+
+backend/cpp/llama/llama.cpp:
+	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
+
+get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
+	touch $@

 replace:
+	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert
+	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
-	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
-	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
-
-dropreplace:
-	$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
-	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
-	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
-	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
-	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
-	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
-	$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang

 prepare-sources: get-sources replace
 	$(GOCMD) mod download
+	touch $@

 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
+	$(MAKE) -C sources/go-llama clean
 	$(MAKE) -C sources/go-llama-ggml clean
 	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
 	$(MAKE) -C sources/go-rwkv clean
@@ -281,6 +287,7 @@ rebuild: ## Rebuilds the project
 	$(MAKE) build

 prepare: prepare-sources $(OPTIONAL_TARGETS)
+	touch $@

 clean: ## Remove build related file
 	$(GOCMD) clean -cache
@@ -291,15 +298,10 @@ clean: ## Remove build related file
 	rm -rf backend-assets
 	$(MAKE) -C backend/cpp/grpc clean
 	$(MAKE) -C backend/cpp/llama clean
-	$(MAKE) dropreplace
-
-clean-tests:
-	rm -rf test-models
-	rm -rf test-dir
-	rm -rf core/http/backend-assets

 ## Build:
-build: prepare backend-assets grpcs ## Build the project
+
+build: backend-assets grpcs prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
@@ -317,10 +319,10 @@ osx-signed: build
 run: prepare ## run local-ai
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./

-test-models/testmodel.ggml:
+test-models/testmodel:
 	mkdir test-models
 	mkdir test-dir
-	wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
+	wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -332,9 +334,9 @@ prepare-test: grpcs
 	cp -rf backend-assets core/http
 	cp tests/models_fixtures/* test-models

-test: prepare test-models/testmodel.ggml grpcs
+test: prepare test-models/testmodel grpcs
 	@echo 'Running tests'
-	export GO_TAGS="tts stablediffusion debug"
+	export GO_TAGS="tts stablediffusion"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
@@ -354,10 +356,6 @@ run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
 	docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests

-run-e2e-aio:
-	@echo 'Running e2e AIO tests'
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
-
 test-e2e:
 	@echo 'Running e2e tests'
 	BUILD_TYPE=$(BUILD_TYPE) \
@@ -388,11 +386,6 @@ test-stablediffusion: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r $(TEST_PATHS)

-test-stores: backend-assets/grpc/local-store
-	mkdir -p tests/integration/backend-assets/grpc
-	cp -f backend-assets/grpc/local-store tests/integration/backend-assets/grpc/
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stores" --flake-attempts 1 -v -r tests/integration
-
 test-container:
 	docker build --target requirements -t local-ai-test-container .
 	docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
@@ -461,94 +454,92 @@ ifeq ($(BUILD_API_ONLY),true)
 	touch backend-assets/keep
 endif

-backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_binding.a
-	mkdir -p backend-assets/espeak-ng-data
-	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
-
-backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
-	mkdir -p backend-assets/gpt4all
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
-
-backend-assets/grpc: replace
+backend-assets/grpc:
 	mkdir -p backend-assets/grpc

-backend-assets/grpc/bert-embeddings: sources/go-bert sources/go-bert/libgobert.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-
-backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-
-backend-assets/grpc/langchain-huggingface: backend-assets/grpc
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
-
-backend/cpp/llama/llama.cpp:
-	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
+backend-assets/grpc/llama: backend-assets/grpc sources/go-llama/libbinding.a
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama LIBRARY_PATH=$(CURDIR)/sources/go-llama \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./backend/go/llm/llama/
+# TODO: every binary should have its own folder instead, so can have different  implementations
+ifeq ($(BUILD_TYPE),metal)
+	cp backend/cpp/llama/llama.cpp/ggml-metal.metal backend-assets/grpc/
+endif

+## BACKEND CPP LLAMA START
+# Sets the variables in case it has to build the gRPC locally.
 INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
 INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
 ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
-				 -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
-				 -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-				 -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
-				 -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
+                 -DProtobuf_DIR=${INSTALLED_LIB_CMAKE}/protobuf \
+                 -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
+                 -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
+                 -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
+
 backend/cpp/llama/grpc-server:
-# Conditionally build grpc for the llama backend to use if needed
 ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
 	$(MAKE) -C backend/cpp/grpc build
-	_PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto \
-	_GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin \
-	PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
-	CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
-	LLAMA_VERSION=$(CPPLLAMA_VERSION) \
-	$(MAKE) -C backend/cpp/llama grpc-server
+	export _PROTOBUF_PROTOC=${INSTALLED_PACKAGES}/bin/proto && \
+	export _GRPC_CPP_PLUGIN_EXECUTABLE=${INSTALLED_PACKAGES}/bin/grpc_cpp_plugin && \
+	export PATH="${INSTALLED_PACKAGES}/bin:${PATH}" && \
+	CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
 else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
 endif
+## BACKEND CPP LLAMA END

+##
 backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
 	cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
 # TODO: every binary should have its own folder instead, so can have different metal implementations
 ifeq ($(BUILD_TYPE),metal)
-	cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
+	cp backend/cpp/llama/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
 endif

-backend-assets/grpc/llama-ggml: sources/go-llama-ggml sources/go-llama-ggml/libbinding.a backend-assets/grpc
+backend-assets/grpc/llama-ggml: backend-assets/grpc sources/go-llama-ggml/libbinding.a
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama-ggml
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama-ggml LIBRARY_PATH=$(CURDIR)/sources/go-llama-ggml \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/

-backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
-	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
+backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/

-backend-assets/grpc/rwkv: sources/go-rwkv sources/go-rwkv/librwkv.a backend-assets/grpc
+backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv

-backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
+backend-assets/grpc/bert-embeddings: backend-assets/grpc sources/go-bert/libgobert.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert LIBRARY_PATH=$(CURDIR)/sources/go-bert \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/

-backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
+backend-assets/grpc/langchain-huggingface: backend-assets/grpc
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
+
+backend-assets/grpc/stablediffusion: backend-assets/grpc
+	if [ ! -f backend-assets/grpc/stablediffusion ]; then \
+		$(MAKE) sources/go-stable-diffusion; \
+		$(MAKE) sources/go-stable-diffusion/libstablediffusion.a; \
+		CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-stable-diffusion/ LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
+		$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion; \
+	fi
+
+backend-assets/grpc/tinydream: backend-assets/grpc sources/go-tiny-dream/libtinydream.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream

-backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
+backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data sources/go-piper/libpiper_binding.a
+	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/

-backend-assets/grpc/local-store: backend-assets/grpc
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
+backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/whisper.cpp LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/

 grpcs: prepare $(GRPC_BACKENDS)

 DOCKER_IMAGE?=local-ai
-DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
 BASE_IMAGE?=ubuntu:22.04

@@ -559,16 +550,6 @@ docker:
 		--build-arg GO_TAGS=$(GO_TAGS) \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .
-	
-docker-aio:
-	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
-	docker build \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
-
-docker-aio-all:
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu

 docker-image-intel:
 	docker build \
@@ -582,4 +563,4 @@ docker-image-intel-xpu:
 		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
-		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
+		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
--- a/README.md
+++ b/README.md
@@ -43,14 +43,13 @@

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

- Vector store: https://github.com/mudler/LocalAI/pull/1795
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
 - Parallel function calling: https://github.com/mudler/LocalAI/pull/1726
 - Upload file API: https://github.com/mudler/LocalAI/pull/1703
 - Tools API support: https://github.com/mudler/LocalAI/pull/1715
 - LLaVa 1.6: https://github.com/mudler/LocalAI/pull/1714
 - ROCm container images: https://github.com/mudler/LocalAI/pull/1595
 - Intel GPU support (sycl, transformers, diffusers): https://github.com/mudler/LocalAI/issues/1653
+- Deprecation of old backends: https://github.com/mudler/LocalAI/issues/1651
 - Mamba support: https://github.com/mudler/LocalAI/pull/1589
 - Start and share models with config file: https://github.com/mudler/LocalAI/pull/1522
 - 🐸 Coqui: https://github.com/mudler/LocalAI/pull/1489
--- a/aio/cpu/README.md
+++ b/aio/cpu/README.md
@@ -1,5 +0,0 @@
-## AIO CPU size
-
-Use this image with CPU-only.
-
-Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,18 +0,0 @@
-backend: bert-embeddings
-embeddings: true
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: text-embedding-ada-002
-
-parameters:
-  model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -1,53 +0,0 @@
-name: stablediffusion
-backend: stablediffusion
-parameters:
-  model: stablediffusion_assets
-
-license: "BSD-3"
-urls:
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
-
-description: |
-     Stable Diffusion in NCNN with c++, supported txt2img and img2img
-
-download_files:
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
-  sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
-  sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
-  sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
-  sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
-  sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
-  sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
-  sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- filename: "stablediffusion_assets/log_sigmas.bin"
-  sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
-  sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
-  sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
-  sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
-  sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
-  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
- filename: "stablediffusion_assets/vocab.txt"
-  sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
--- a/aio/cpu/speech-to-text.yaml
+++ b/aio/cpu/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"voice-en-us-amy-low",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@@ -1,25 +0,0 @@
-name: gpt-4
-mmap: true
-parameters:
-  model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 2048
-f16: true
-stopwords:
- <|im_end|>
- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2-chat",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -1,40 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: gpt-4-vision-preview
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: bakllava-mmproj.gguf
-parameters:
-  model: bakllava.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "gpt-4-vision-preview",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -1,98 +0,0 @@
-#!/bin/bash
-
-echo "===> LocalAI All-in-One (AIO) container starting..."
-
-GPU_ACCELERATION=false
-GPU_VENDOR=""
-
-function detect_gpu() {
-    case "$(uname -s)" in
-        Linux)
-            if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
-                echo "NVIDIA GPU detected"
-                # nvidia-smi should be installed in the container
-                if nvidia-smi; then
-                    GPU_ACCELERATION=true
-                    GPU_VENDOR=nvidia
-                else
-                    echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
-                fi
-            elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
-                echo "AMD GPU detected"
-                # Check if ROCm is installed
-                if [ -d /opt/rocm ]; then
-                    GPU_ACCELERATION=true
-                    GPU_VENDOR=amd
-                else
-                    echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
-                fi
-            elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
-                echo "Intel GPU detected"
-                if [ -d /opt/intel ]; then
-                    GPU_ACCELERATION=true
-                else
-                    echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
-                fi
-            fi
-            ;;
-        Darwin)
-            if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
-                echo "Apple Metal supported GPU detected"
-                GPU_ACCELERATION=true
-                GPU_VENDOR=apple
-            fi
-            ;;
-    esac
-}
-
-function detect_gpu_size() {
-    if [ "$GPU_ACCELERATION" = true ]; then
-        GPU_SIZE=gpu-8g
-    fi
-
-    # Attempting to find GPU memory size for NVIDIA GPUs
-    if echo "$gpu_model" | grep -iq nvidia; then
-        echo "NVIDIA GPU detected. Attempting to find memory size..."
-        nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits))
-        if [ ! -z "$nvidia_sm" ]; then
-            echo "Total GPU Memory: ${nvidia_sm[0]} MiB"
-        else
-            echo "Unable to determine NVIDIA GPU memory size."
-        fi
-        # if bigger than 8GB, use 16GB
-        #if [ "$nvidia_sm" -gt 8192 ]; then
-        #    GPU_SIZE=gpu-16g
-        #fi
-    else
-        echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script."
-    fi
-
-    # default to cpu if GPU_SIZE is not set
-    if [ -z "$GPU_SIZE" ]; then
-        GPU_SIZE=cpu
-    fi
-}
-
-function check_vars() {
-    if [ -z "$MODELS" ]; then
-        echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
-        exit 1
-    fi
-
-    if [ -z "$SIZE" ]; then
-        echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
-        exit 1
-    fi
-}
-
-detect_gpu
-detect_gpu_size
-
-SIZE="${SIZE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}"
-
-check_vars
-
-echo "Starting LocalAI with the following models: $MODELS"
-
-/build/entrypoint.sh "$@"
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@@ -1,13 +0,0 @@
-name: text-embedding-ada-002
-backend: sentencetransformers
-embeddings: true
-parameters:
-  model: all-MiniLM-L6-v2
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -1,22 +0,0 @@
-name: stablediffusion
-parameters:
-  model: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
-backend: diffusers
-step: 25
-f16: true
-
-diffusers:
-  pipeline_type: StableDiffusionPipeline
-  cuda: true
-  enable_parameters: "negative_prompt,num_inference_steps"
-  scheduler_type: "k_dpmpp_2m"
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "model": "dreamshaper",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/aio/gpu-8g/speech-to-text.yaml
+++ b/aio/gpu-8g/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/gpu-8g/text-to-speech.yaml
+++ b/aio/gpu-8g/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"tts-1",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -1,51 +0,0 @@
-name: gpt-4
-mmap: true
-parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-
-roles:
-  assistant_function_call: assistant
-  function: tool
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
-    {{ if eq .RoleName "function" }}<tool_result>{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
-    {{ if eq .RoleName "function" }}</tool_result>{{end}}
-    <|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 
-    <tools>
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    </tools> 
-    Use the following pydantic model json schema for each tool call you will make: 
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} 
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
-    <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call><|im_end|>
-    {{.Input}}
-    <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
- <|im_end|>
- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "gpt-4",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -1,37 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: gpt-4-vision-preview
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: llava-v1.6-7b-mmproj-f16.gguf
-parameters:
-  model: llava-v1.6-mistral-7b.Q5_K_M.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
- filename: llava-v1.6-7b-mmproj-f16.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "gpt-4-vision-preview",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -18,48 +18,6 @@ service Backend {
  rpc TTS(TTSRequest) returns (Result) {}
  rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
  rpc Status(HealthMessage) returns (StatusResponse) {}
-
-  rpc StoresSet(StoresSetOptions) returns (Result) {}
-  rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
-  rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
-  rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
-}
-
-message StoresKey {
-  repeated float Floats = 1;
-}
-
-message StoresValue {
-  bytes Bytes = 1;
-}
-
-message StoresSetOptions {
-  repeated StoresKey Keys = 1;
-  repeated StoresValue Values = 2;
-}
-
-message StoresDeleteOptions {
-  repeated StoresKey Keys = 1;
-}
-
-message StoresGetOptions {
-  repeated StoresKey Keys = 1;
-}
-
-message StoresGetResult {
-  repeated StoresKey Keys = 1;
-  repeated StoresValue Values = 2;
-}
-
-message StoresFindOptions {
-  StoresKey Key = 1;
-  int32 TopK = 2;
-}
-
-message StoresFindResult {
-  repeated StoresKey Keys = 1;
-  repeated StoresValue Values = 2;
-  repeated float Similarities = 3;
 }

 message HealthMessage {}
@@ -163,7 +121,7 @@ message ModelOptions {

  bool NoMulMatQ = 37;
  string DraftModel = 39;
-
+  
  string AudioPath = 38;

  // vllm
@@ -255,4 +213,4 @@ message StatusResponse {
  }
  State state = 1;
  MemoryUsageData memory = 2;
-}
+}
--- a/backend/cpp/grpc/Makefile
+++ b/backend/cpp/grpc/Makefile
@@ -48,7 +48,7 @@ $(INSTALLED_PACKAGES): grpc_build

 $(GRPC_REPO):
 	git clone --depth $(GIT_CLONE_DEPTH) -b $(TAG_LIB_GRPC) $(GIT_REPO_LIB_GRPC) $(GRPC_REPO)/grpc
-	cd $(GRPC_REPO)/grpc && git submodule update --jobs 2 --init --recursive --depth $(GIT_CLONE_DEPTH)
+	cd $(GRPC_REPO)/grpc && git submodule update --init --recursive --depth $(GIT_CLONE_DEPTH)

 $(GRPC_BUILD): $(GRPC_REPO)
 	mkdir -p $(GRPC_BUILD)
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -18,12 +18,6 @@ else ifeq ($(BUILD_TYPE),clblas)
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
 else ifeq ($(BUILD_TYPE),hipblas)
 	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON
-# If it's OSX, DO NOT embed the metal library - -DLLAMA_METAL_EMBED_LIBRARY=ON requires further investigation
-# But if it's OSX without metal, disable it here
-else ifeq ($(OS),darwin)
-	ifneq ($(BUILD_TYPE),metal)
-		CMAKE_ARGS+=-DLLAMA_METAL=OFF
-	endif
 endif

 ifeq ($(BUILD_TYPE),sycl_f16)
@@ -41,7 +35,7 @@ llama.cpp:
 	fi
 	cd llama.cpp && git checkout -b build $(LLAMA_VERSION) && git submodule update --init --recursive --depth 1

-llama.cpp/examples/grpc-server: llama.cpp
+llama.cpp/examples/grpc-server:
 	mkdir -p llama.cpp/examples/grpc-server
 	cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
 	cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -1084,7 +1084,7 @@ struct llama_server_context
            slot.has_next_token = false;
        }

-        if (result.tok == llama_token_eos(model))
+        if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
        {
            slot.stopped_eos = true;
            slot.has_next_token = false;
--- a/backend/go/stores/debug.go
+++ b/backend/go/stores/debug.go
@@ -1,14 +0,0 @@
-//go:build debug
-// +build debug
-
-package main
-
-import (
-	"github.com/rs/zerolog/log"
-)
-
-func assert(cond bool, msg string) {
-	if !cond {
-		log.Fatal().Stack().Msg(msg)
-	}
-}
--- a/backend/go/stores/main.go
+++ b/backend/go/stores/main.go
@@ -1,26 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each store
-
-import (
-	"flag"
-	"os"
-
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/rs/zerolog"
-	"github.com/rs/zerolog/log"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
-
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, NewStore()); err != nil {
-		panic(err)
-	}
-}
--- a/backend/go/stores/production.go
+++ b/backend/go/stores/production.go
@@ -1,7 +0,0 @@
-//go:build !debug
-// +build !debug
-
-package main
-
-func assert(cond bool, msg string) {
-}
--- a/backend/go/stores/store.go
+++ b/backend/go/stores/store.go
@@ -1,507 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"container/heap"
-	"fmt"
-	"math"
-	"slices"
-
-	"github.com/go-skynet/LocalAI/pkg/grpc/base"
-	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
-
-	"github.com/rs/zerolog/log"
-)
-
-type Store struct {
-	base.SingleThread
-
-	// The sorted keys
-	keys [][]float32
-	// The sorted values
-	values [][]byte
-
-	// If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions
-	// TODO: Should we normalize incoming keys if they are not instead?
-	keysAreNormalized bool
-	// The first key decides the length of the keys
-	keyLen int
-}
-
-// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because
-// that's theoretically best for memory layout and cache locality, but this isn't optimized yet.
-type Pair struct {
-	Key   []float32
-	Value []byte
-}
-
-func NewStore() *Store {
-	return &Store{
-		keys:              make([][]float32, 0),
-		values:            make([][]byte, 0),
-		keysAreNormalized: true,
-		keyLen:            -1,
-	}
-}
-
-func compareSlices(k1, k2 []float32) int {
-	assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
-
-	return slices.Compare(k1, k2)
-}
-
-func hasKey(unsortedSlice [][]float32, target []float32) bool {
-	return slices.ContainsFunc(unsortedSlice, func(k []float32) bool {
-		return compareSlices(k, target) == 0
-	})
-}
-
-func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) {
-	return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int {
-		return compareSlices(k, t)
-	})
-}
-
-func isSortedPairs(kvs []Pair) bool {
-	for i := 1; i < len(kvs); i++ {
-		if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 {
-			return false
-		}
-	}
-
-	return true
-}
-
-func isSortedKeys(keys [][]float32) bool {
-	for i := 1; i < len(keys); i++ {
-		if compareSlices(keys[i-1], keys[i]) > 0 {
-			return false
-		}
-	}
-
-	return true
-}
-
-func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
-	ks := make([][]float32, len(keys))
-
-	for i, k := range keys {
-		ks[i] = k.Floats
-	}
-
-	slices.SortFunc(ks, compareSlices)
-
-	assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys)))
-	assert(isSortedKeys(ks), "keys are not sorted")
-
-	return ks
-}
-
-func (s *Store) Load(opts *pb.ModelOptions) error {
-	return nil
-}
-
-// Sort the incoming kvs and merge them with the existing sorted kvs
-func (s *Store) StoresSet(opts *pb.StoresSetOptions) error {
-	if len(opts.Keys) == 0 {
-		return fmt.Errorf("no keys to add")
-	}
-
-	if len(opts.Keys) != len(opts.Values) {
-		return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
-	}
-
-	if s.keyLen == -1 {
-		s.keyLen = len(opts.Keys[0].Floats)
-	} else {
-		if len(opts.Keys[0].Floats) != s.keyLen {
-			return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
-		}
-	}
-
-	kvs := make([]Pair, len(opts.Keys))
-
-	for i, k := range opts.Keys {
-		if s.keysAreNormalized && !isNormalized(k.Floats) {
-			s.keysAreNormalized = false
-			var sample []float32
-			if len(s.keys) > 5 {
-				sample = k.Floats[:5]
-			} else {
-				sample = k.Floats
-			}
-			log.Debug().Msgf("Key is not normalized: %v", sample)
-		}
-
-		kvs[i] = Pair{
-			Key:   k.Floats,
-			Value: opts.Values[i].Bytes,
-		}
-	}
-
-	slices.SortFunc(kvs, func(a, b Pair) int {
-		return compareSlices(a.Key, b.Key)
-	})
-
-	assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys)))
-	assert(isSortedPairs(kvs), "keys are not sorted")
-
-	l := len(kvs) + len(s.keys)
-	merge_ks := make([][]float32, 0, l)
-	merge_vs := make([][]byte, 0, l)
-
-	i, j := 0, 0
-	for {
-		if i+j >= l {
-			break
-		}
-
-		if i >= len(kvs) {
-			merge_ks = append(merge_ks, s.keys[j])
-			merge_vs = append(merge_vs, s.values[j])
-			j++
-			continue
-		}
-
-		if j >= len(s.keys) {
-			merge_ks = append(merge_ks, kvs[i].Key)
-			merge_vs = append(merge_vs, kvs[i].Value)
-			i++
-			continue
-		}
-
-		c := compareSlices(kvs[i].Key, s.keys[j])
-		if c < 0 {
-			merge_ks = append(merge_ks, kvs[i].Key)
-			merge_vs = append(merge_vs, kvs[i].Value)
-			i++
-		} else if c > 0 {
-			merge_ks = append(merge_ks, s.keys[j])
-			merge_vs = append(merge_vs, s.values[j])
-			j++
-		} else {
-			merge_ks = append(merge_ks, kvs[i].Key)
-			merge_vs = append(merge_vs, kvs[i].Value)
-			i++
-			j++
-		}
-	}
-
-	assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l))
-	assert(isSortedKeys(merge_ks), "merge keys are not sorted")
-
-	s.keys = merge_ks
-	s.values = merge_vs
-
-	return nil
-}
-
-func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error {
-	if len(opts.Keys) == 0 {
-		return fmt.Errorf("no keys to delete")
-	}
-
-	if len(opts.Keys) == 0 {
-		return fmt.Errorf("no keys to add")
-	}
-
-	if s.keyLen == -1 {
-		s.keyLen = len(opts.Keys[0].Floats)
-	} else {
-		if len(opts.Keys[0].Floats) != s.keyLen {
-			return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
-		}
-	}
-
-	ks := sortIntoKeySlicese(opts.Keys)
-
-	l := len(s.keys) - len(ks)
-	merge_ks := make([][]float32, 0, l)
-	merge_vs := make([][]byte, 0, l)
-
-	tail_ks := s.keys
-	tail_vs := s.values
-	for _, k := range ks {
-		j, found := findInSortedSlice(tail_ks, k)
-
-		if found {
-			merge_ks = append(merge_ks, tail_ks[:j]...)
-			merge_vs = append(merge_vs, tail_vs[:j]...)
-			tail_ks = tail_ks[j+1:]
-			tail_vs = tail_vs[j+1:]
-		} else {
-			assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k))
-		}
-
-		log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs))
-	}
-
-	merge_ks = append(merge_ks, tail_ks...)
-	merge_vs = append(merge_vs, tail_vs...)
-
-	assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys)))
-
-	s.keys = merge_ks
-	s.values = merge_vs
-
-	assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l))
-	assert(isSortedKeys(s.keys), "keys are not sorted")
-	assert(func() bool {
-		for _, k := range ks {
-			if _, found := findInSortedSlice(s.keys, k); found {
-				return false
-			}
-		}
-		return true
-	}(), "Keys to delete still present")
-
-	if len(s.keys) != l {
-		log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l)
-	}
-
-	return nil
-}
-
-func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
-	pbKeys := make([]*pb.StoresKey, 0, len(opts.Keys))
-	pbValues := make([]*pb.StoresValue, 0, len(opts.Keys))
-	ks := sortIntoKeySlicese(opts.Keys)
-
-	if len(s.keys) == 0 {
-		log.Debug().Msgf("Get: No keys in store")
-	}
-
-	if s.keyLen == -1 {
-		s.keyLen = len(opts.Keys[0].Floats)
-	} else {
-		if len(opts.Keys[0].Floats) != s.keyLen {
-			return pb.StoresGetResult{}, fmt.Errorf("Try to get a key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
-		}
-	}
-
-	tail_k := s.keys
-	tail_v := s.values
-	for i, k := range ks {
-		j, found := findInSortedSlice(tail_k, k)
-
-		if found {
-			pbKeys = append(pbKeys, &pb.StoresKey{
-				Floats: k,
-			})
-			pbValues = append(pbValues, &pb.StoresValue{
-				Bytes: tail_v[j],
-			})
-
-			tail_k = tail_k[j+1:]
-			tail_v = tail_v[j+1:]
-		} else {
-			assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: i=%d, %v", i, k))
-		}
-	}
-
-	if len(pbKeys) != len(opts.Keys) {
-		log.Debug().Msgf("Get: Some keys not found: len(pbKeys) = %d, len(opts.Keys) = %d, len(s.Keys) = %d", len(pbKeys), len(opts.Keys), len(s.keys))
-	}
-
-	return pb.StoresGetResult{
-		Keys:   pbKeys,
-		Values: pbValues,
-	}, nil
-}
-
-func isNormalized(k []float32) bool {
-	var sum float32
-	for _, v := range k {
-		sum += v
-	}
-
-	return sum == 1.0
-}
-
-// TODO: This we could replace with handwritten SIMD code
-func normalizedCosineSimilarity(k1, k2 []float32) float32 {
-	assert(len(k1) == len(k2), fmt.Sprintf("normalizedCosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
-
-	var dot float32
-	for i := 0; i < len(k1); i++ {
-		dot += k1[i] * k2[i]
-	}
-
-	assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
-
-	// 2.0 * (1.0 - dot) would be the Euclidean distance
-	return dot
-}
-
-type PriorityItem struct {
-	Similarity float32
-	Key        []float32
-	Value      []byte
-}
-
-type PriorityQueue []*PriorityItem
-
-func (pq PriorityQueue) Len() int { return len(pq) }
-
-func (pq PriorityQueue) Less(i, j int) bool {
-	// Inverted because the most similar should be at the top
-	return pq[i].Similarity < pq[j].Similarity
-}
-
-func (pq PriorityQueue) Swap(i, j int) {
-	pq[i], pq[j] = pq[j], pq[i]
-}
-
-func (pq *PriorityQueue) Push(x any) {
-	item := x.(*PriorityItem)
-	*pq = append(*pq, item)
-}
-
-func (pq *PriorityQueue) Pop() any {
-	old := *pq
-	n := len(old)
-	item := old[n-1]
-	*pq = old[0 : n-1]
-	return item
-}
-
-func (s *Store) StoresFindNormalized(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
-	tk := opts.Key.Floats
-	top_ks := make(PriorityQueue, 0, int(opts.TopK))
-	heap.Init(&top_ks)
-
-	for i, k := range s.keys {
-		sim := normalizedCosineSimilarity(tk, k)
-		heap.Push(&top_ks, &PriorityItem{
-			Similarity: sim,
-			Key:        k,
-			Value:      s.values[i],
-		})
-
-		if top_ks.Len() > int(opts.TopK) {
-			heap.Pop(&top_ks)
-		}
-	}
-
-	similarities := make([]float32, top_ks.Len())
-	pbKeys := make([]*pb.StoresKey, top_ks.Len())
-	pbValues := make([]*pb.StoresValue, top_ks.Len())
-
-	for i := top_ks.Len() - 1; i >= 0; i-- {
-		item := heap.Pop(&top_ks).(*PriorityItem)
-
-		similarities[i] = item.Similarity
-		pbKeys[i] = &pb.StoresKey{
-			Floats: item.Key,
-		}
-		pbValues[i] = &pb.StoresValue{
-			Bytes: item.Value,
-		}
-	}
-
-	return pb.StoresFindResult{
-		Keys:         pbKeys,
-		Values:       pbValues,
-		Similarities: similarities,
-	}, nil
-}
-
-func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
-	assert(len(k1) == len(k2), fmt.Sprintf("cosineSimilarity: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
-
-	var dot, mag2 float64
-	for i := 0; i < len(k1); i++ {
-		dot += float64(k1[i] * k2[i])
-		mag2 += float64(k2[i] * k2[i])
-	}
-
-	sim := float32(dot / (mag1 * math.Sqrt(mag2)))
-
-	assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
-
-	return sim
-}
-
-func (s *Store) StoresFindFallback(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
-	tk := opts.Key.Floats
-	top_ks := make(PriorityQueue, 0, int(opts.TopK))
-	heap.Init(&top_ks)
-
-	var mag1 float64
-	for _, v := range tk {
-		mag1 += float64(v * v)
-	}
-	mag1 = math.Sqrt(mag1)
-
-	for i, k := range s.keys {
-		dist := cosineSimilarity(tk, k, mag1)
-		heap.Push(&top_ks, &PriorityItem{
-			Similarity: dist,
-			Key:        k,
-			Value:      s.values[i],
-		})
-
-		if top_ks.Len() > int(opts.TopK) {
-			heap.Pop(&top_ks)
-		}
-	}
-
-	similarities := make([]float32, top_ks.Len())
-	pbKeys := make([]*pb.StoresKey, top_ks.Len())
-	pbValues := make([]*pb.StoresValue, top_ks.Len())
-
-	for i := top_ks.Len() - 1; i >= 0; i-- {
-		item := heap.Pop(&top_ks).(*PriorityItem)
-
-		similarities[i] = item.Similarity
-		pbKeys[i] = &pb.StoresKey{
-			Floats: item.Key,
-		}
-		pbValues[i] = &pb.StoresValue{
-			Bytes: item.Value,
-		}
-	}
-
-	return pb.StoresFindResult{
-		Keys:         pbKeys,
-		Values:       pbValues,
-		Similarities: similarities,
-	}, nil
-}
-
-func (s *Store) StoresFind(opts *pb.StoresFindOptions) (pb.StoresFindResult, error) {
-	tk := opts.Key.Floats
-
-	if len(tk) != s.keyLen {
-		return pb.StoresFindResult{}, fmt.Errorf("Try to find key with length %d when existing length is %d", len(tk), s.keyLen)
-	}
-
-	if opts.TopK < 1 {
-		return pb.StoresFindResult{}, fmt.Errorf("opts.TopK = %d, must be >= 1", opts.TopK)
-	}
-
-	if s.keyLen == -1 {
-		s.keyLen = len(opts.Key.Floats)
-	} else {
-		if len(opts.Key.Floats) != s.keyLen {
-			return pb.StoresFindResult{}, fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Key.Floats), s.keyLen)
-		}
-	}
-
-	if s.keysAreNormalized && isNormalized(tk) {
-		return s.StoresFindNormalized(opts)
-	} else {
-		if s.keysAreNormalized {
-			var sample []float32
-			if len(s.keys) > 5 {
-				sample = tk[:5]
-			} else {
-				sample = tk
-			}
-			log.Debug().Msgf("Trying to compare non-normalized key with normalized keys: %v", sample)
-		}
-
-		return s.StoresFindFallback(opts)
-	}
-}
--- a/backend/python/common-env/transformers/transformers-nvidia.yml
+++ b/backend/python/common-env/transformers/transformers-nvidia.yml
@@ -30,7 +30,6 @@ dependencies:
      - async-timeout==4.0.3
      - attrs==23.1.0
      - bark==0.1.5
-      - bitsandbytes==0.43.0
      - boto3==1.28.61
      - botocore==1.31.61
      - certifi==2023.7.22
--- a/backend/python/transformers/transformers_server.py
+++ b/backend/python/transformers/transformers_server.py
@@ -23,7 +23,7 @@ if XPU:
    from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
    from transformers import AutoTokenizer, AutoModel, set_seed
 else:
-    from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig
+    from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed


 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -75,50 +75,18 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            A Result object that contains the result of the LoadModel operation.
        """
        model_name = request.Model
-
-        compute = "auto"
-        if request.F16Memory == True:
-            compute=torch.bfloat16
-
-        self.CUDA = request.CUDA
-
-        device_map="cpu"
-
-        quantization = None
-
-        if self.CUDA:
-            if request.Device:
-                device_map=request.Device
-            else:
-                device_map="cuda:0"
-            if request.Quantization == "bnb_4bit":
-                quantization = BitsAndBytesConfig(
-                    load_in_4bit = True,
-                    bnb_4bit_compute_dtype = compute,
-                    bnb_4bit_quant_type = "nf4",
-                    bnb_4bit_use_double_quant = True,
-                    load_in_8bit = False,
-                )
-            elif request.Quantization == "bnb_8bit":
-                quantization = BitsAndBytesConfig(
-                    load_in_4bit=False,
-                    bnb_4bit_compute_dtype = None,
-                    load_in_8bit=True,                                   
-                )
-                                                   
-    
        try:
            if request.Type == "AutoModelForCausalLM":
                if XPU:
-                    if quantization == "xpu_4bit":
-                        xpu_4bit = True
                    self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,
-                                              device_map="xpu", load_in_4bit=xpu_4bit)
+                                              device_map="xpu", load_in_4bit=True)
                else:
-                    self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, quantization_config=quantization, device_map=device_map, torch_dtype=compute)
+                    self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
            else:
-                self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode,  use_safetensors=True,  quantization_config=quantization, device_map=device_map, torch_dtype=compute)
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
+                self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode)
+
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.CUDA = False
            self.XPU = False

            if XPU:
@@ -129,6 +97,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                except Exception as err:
                    print("Not using XPU:", err, file=sys.stderr)

+            if request.CUDA or torch.cuda.is_available():
+                try:
+                    print("Loading model", model_name, "to CUDA.", file=sys.stderr)
+                    self.model = self.model.to("cuda")
+                    self.CUDA = True
+                except Exception as err:
+                    print("Not using CUDA:", err, file=sys.stderr)
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        # Implement your logic here for the LoadModel service
@@ -155,17 +130,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt")    

        # Create word embeddings
-        if self.CUDA:
-            encoded_input = encoded_input.to("cuda")
-
-        with torch.no_grad():    
-            model_output = self.model(**encoded_input)
+        model_output = self.model(**encoded_input)

        # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
-        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
+        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']).detach().numpy()
        print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
        print("Embeddings:", sentence_embeddings, file=sys.stderr)
-        return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
+        return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)

    def Predict(self, request, context):
        """
@@ -192,8 +163,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        if XPU:
            inputs = inputs.to("xpu")

-        outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP, do_sample=True, pad_token_id=self.tokenizer.eos_token_id)
-        generated_text = self.tokenizer.batch_decode(outputs[:, inputs.shape[1]:], skip_special_tokens=True)[0]
+        outputs = self.model.generate(inputs,max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP)
+
+        generated_text = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+        # Remove prompt from response if present
+        if request.Prompt in generated_text:
+            generated_text = generated_text.replace(request.Prompt, "")

        return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))

--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@@ -10,6 +10,10 @@ import (
 )

 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
+	if !backendConfig.Embeddings {
+		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
+	}
+
 	modelFile := backendConfig.Model

 	grpcOpts := gRPCModelOpts(backendConfig)
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -1,23 +0,0 @@
-package backend
-
-import (
-	"github.com/go-skynet/LocalAI/core/config"
-
-	"github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/model"
-)
-
-func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
-    if storeName == "" {
-      storeName = "default"
-    }
-
-    sc := []model.Option{
-      model.WithBackendString(model.LocalStoreBackend),
-      model.WithAssetDir(appConfig.AssetsDestination),
-      model.WithModel(storeName),
-    }
-
-    return sl.BackendLoader(sc...)
-}
-
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -258,21 +258,6 @@ func WithApiKeys(apiKeys []string) AppOption {
 	}
 }

-// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
-// Some options defined at the application level are going to be passed as defaults for
-// all the configuration for the models.
-// This includes for instance the context size or the number of threads.
-// If a model doesn't set configs directly to the config model file
-// it will use the defaults defined here.
-func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption {
-	return []ConfigLoaderOption{
-		LoadOptionContextSize(o.ContextSize),
-		LoadOptionDebug(o.Debug),
-		LoadOptionF16(o.F16),
-		LoadOptionThreads(o.Threads),
-	}
-}
-
 // func WithMetrics(meter *metrics.Metrics) AppOption {
 // 	return func(o *StartupOptions) {
 // 		o.Metrics = meter
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -188,14 +188,7 @@ func (c *BackendConfig) FunctionToCall() string {
 	return c.functionCallNameString
 }

-func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
-	lo := &LoadOptions{}
-	lo.Apply(opts...)
-
-	ctx := lo.ctxSize
-	threads := lo.threads
-	f16 := lo.f16
-	debug := lo.debug
+func (cfg *BackendConfig) SetDefaults(debug bool, threads, ctx int, f16 bool) {
 	defaultTopP := 0.7
 	defaultTopK := 80
 	defaultTemp := 0.9
@@ -283,12 +276,8 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.F16 = &f16
 	}

-	if cfg.Debug == nil {
-		cfg.Debug = &falseV
-	}
-
 	if debug {
-		cfg.Debug = &trueV
+		cfg.Debug = &debug
 	}
 }

@@ -340,6 +329,9 @@ func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
 // Load a config file for a model
 func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {

+	lo := &LoadOptions{}
+	lo.Apply(opts...)
+
 	// Load a config file if present after the model name
 	cfg := &BackendConfig{
 		PredictionOptions: schema.PredictionOptions{
@@ -354,9 +346,7 @@ func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
 		// Try loading a model config file
 		modelConfig := filepath.Join(modelPath, modelName+".yaml")
 		if _, err := os.Stat(modelConfig); err == nil {
-			if err := cl.LoadBackendConfig(
-				modelConfig, opts...,
-			); err != nil {
+			if err := cl.LoadBackendConfig(modelConfig); err != nil {
 				return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
 			}
 			cfgExisting, exists = cl.GetBackendConfig(modelName)
@@ -366,7 +356,7 @@ func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
 		}
 	}

-	cfg.SetDefaults(opts...)
+	cfg.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16)

 	return cfg, nil
 }
@@ -377,6 +367,9 @@ func NewBackendConfigLoader() *BackendConfigLoader {
 	}
 }
 func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
+	lo := &LoadOptions{}
+	lo.Apply(opts...)
+
 	c := &[]*BackendConfig{}
 	f, err := os.ReadFile(file)
 	if err != nil {
@@ -387,7 +380,7 @@ func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendC
 	}

 	for _, cc := range *c {
-		cc.SetDefaults(opts...)
+		cc.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16)
 	}

 	return *c, nil
@@ -406,7 +399,7 @@ func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig,
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}

-	c.SetDefaults(opts...)
+	c.SetDefaults(lo.debug, lo.threads, lo.ctxSize, lo.f16)
 	return c, nil
 }

--- a/core/http/api.go
+++ b/core/http/api.go
@@ -172,13 +172,6 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 	// Elevenlabs
 	app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))

-	// Stores
-	sl := model.NewModelLoader("")
-	app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
-	app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
-	app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
-	app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
-
 	// openAI compatible API endpoint

 	// chat
--- a/core/http/api_test.go
+++ b/core/http/api_test.go
@@ -15,7 +15,6 @@ import (

 	"github.com/go-skynet/LocalAI/core/config"
 	. "github.com/go-skynet/LocalAI/core/http"
-	"github.com/go-skynet/LocalAI/core/schema"
 	"github.com/go-skynet/LocalAI/core/startup"

 	"github.com/go-skynet/LocalAI/pkg/downloader"
@@ -123,75 +122,6 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
 	return
 }

-func postRequestJSON[B any](url string, bodyJson *B) error {
-	payload, err := json.Marshal(bodyJson)
-	if err != nil {
-		return err
-	}
-
-	GinkgoWriter.Printf("POST %s: %s\n", url, string(payload))
-
-	req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
-	if err != nil {
-		return err
-	}
-
-	req.Header.Set("Content-Type", "application/json")
-
-	client := &http.Client{}
-	resp, err := client.Do(req)
-	if err != nil {
-		return err
-	}
-
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return err
-	}
-
-	if resp.StatusCode < 200 || resp.StatusCode >= 400 {
-		return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
-	}
-
-	return nil
-}
-
-func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *B2) error {
-	payload, err := json.Marshal(reqJson)
-	if err != nil {
-		return err
-	}
-
-	GinkgoWriter.Printf("POST %s: %s\n", url, string(payload))
-
-	req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
-	if err != nil {
-		return err
-	}
-
-	req.Header.Set("Content-Type", "application/json")
-
-	client := &http.Client{}
-	resp, err := client.Do(req)
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		return err
-	}
-
-	if resp.StatusCode < 200 || resp.StatusCode >= 400 {
-		return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body))
-	}
-
-	return json.Unmarshal(body, respJson)
-}
-
 //go:embed backend-assets/*
 var backendAssets embed.FS

@@ -736,15 +666,15 @@ var _ = Describe("API test", func() {
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
 		})
-		It("can generate completions via ggml", func() {
-			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
+		It("can generate completions", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt})
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(resp.Choices)).To(Equal(1))
 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
 		})

-		It("can generate chat completions via ggml", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel.ggml", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
+		It("can generate chat completions ", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(resp.Choices)).To(Equal(1))
 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
@@ -906,78 +836,6 @@ var _ = Describe("API test", func() {
 				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
 			})
 		})
-
-		// See tests/integration/stores_test
-		Context("Stores", Label("stores"), func() {
-
-			It("sets, gets, finds and deletes entries", func() {
-				ks := [][]float32{
-					{0.1, 0.2, 0.3},
-					{0.4, 0.5, 0.6},
-					{0.7, 0.8, 0.9},
-				}
-				vs := []string{
-					"test1",
-					"test2",
-					"test3",
-				}
-				setBody := schema.StoresSet{
-					Keys:   ks,
-					Values: vs,
-				}
-
-				url := "http://127.0.0.1:9090/stores/"
-				err := postRequestJSON(url+"set", &setBody)
-				Expect(err).ToNot(HaveOccurred())
-
-				getBody := schema.StoresGet{
-					Keys: ks,
-				}
-				var getRespBody schema.StoresGetResponse
-				err = postRequestResponseJSON(url+"get", &getBody, &getRespBody)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(getRespBody.Keys)).To(Equal(len(ks)))
-
-				for i, v := range getRespBody.Keys {
-					if v[0] == 0.1 {
-						Expect(getRespBody.Values[i]).To(Equal("test1"))
-					} else if v[0] == 0.4 {
-						Expect(getRespBody.Values[i]).To(Equal("test2"))
-					} else {
-						Expect(getRespBody.Values[i]).To(Equal("test3"))
-					}
-				}
-
-				deleteBody := schema.StoresDelete{
-					Keys: [][]float32{
-						{0.1, 0.2, 0.3},
-					},
-				}
-				err = postRequestJSON(url+"delete", &deleteBody)
-				Expect(err).ToNot(HaveOccurred())
-
-				findBody := schema.StoresFind{
-					Key:  []float32{0.1, 0.3, 0.7},
-					Topk: 10,
-				}
-
-				var findRespBody schema.StoresFindResponse
-				err = postRequestResponseJSON(url+"find", &findBody, &findRespBody)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(findRespBody.Keys)).To(Equal(2))
-
-				for i, v := range findRespBody.Keys {
-					if v[0] == 0.4 {
-						Expect(findRespBody.Values[i]).To(Equal("test2"))
-					} else {
-						Expect(findRespBody.Values[i]).To(Equal("test3"))
-					}
-
-					Expect(findRespBody.Similarities[i]).To(BeNumerically(">=", -1))
-					Expect(findRespBody.Similarities[i]).To(BeNumerically("<=", 1))
-				}
-			})
-		})
 	})

 	Context("Config file", func() {
--- a/core/http/endpoints/localai/stores.go
+++ b/core/http/endpoints/localai/stores.go
@@ -1,121 +0,0 @@
-package localai
-
-import (
-	"github.com/go-skynet/LocalAI/core/backend"
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/core/schema"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/store"
-	"github.com/gofiber/fiber/v2"
-)
-
-func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.StoresSet)
-
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
-		if err != nil {
-			return err
-		}
-
-		vals := make([][]byte, len(input.Values))
-		for i, v := range input.Values {
-			vals[i] = []byte(v)
-		}
-
-		err = store.SetCols(c.Context(), sb, input.Keys, vals)
-		if err != nil {
-			return err
-		}
-
-		return c.Send(nil)
-	}
-}
-
-func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.StoresDelete)
-
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
-		if err != nil {
-			return err
-		}
-
-		if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
-			return err
-		}
-
-		return c.Send(nil)
-	}
-}
-
-func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.StoresGet)
-
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
-		if err != nil {
-			return err
-		}
-
-		keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
-		if err != nil {
-			return err
-		}
-
-		res := schema.StoresGetResponse{
-			Keys:   keys,
-			Values: make([]string, len(vals)),
-		}
-
-		for i, v := range vals {
-			res.Values[i] = string(v)
-		}
-
-		return c.JSON(res)
-	}
-}
-
-func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.StoresFind)
-
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
-		if err != nil {
-			return err
-		}
-
-		keys, vals, similarities, err := store.Find(c.Context(), sb, input.Key, input.Topk)
-		if err != nil {
-			return err
-		}
-
-		res := schema.StoresFindResponse{
-			Keys:         keys,
-			Values:       make([]string, len(vals)),
-			Similarities: similarities,
-		}
-
-		for i, v := range vals {
-			res.Values[i] = string(v)
-		}
-
-		return c.JSON(res)
-	}
-}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -248,10 +248,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 					Role:         r,
 					RoleName:     role,
 					Content:      i.StringContent,
-					FunctionCall: i.FunctionCall,
 					FunctionName: i.Name,
-					LastMessage:  messageIndex == (len(input.Messages) - 1),
-					Function:     config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
 					MessageIndex: messageIndex,
 				}
 				templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -185,14 +185,6 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 		config.RepeatPenalty = input.RepeatPenalty
 	}

-	if input.FrequencyPenalty!= 0 {
-		config.FrequencyPenalty = input.FrequencyPenalty
-	}
-
-	if input.PresencePenalty!= 0 {
-		config.PresencePenalty = input.PresencePenalty
-	}
-
 	if input.Keep != 0 {
 		config.Keep = input.Keep
 	}
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -20,40 +20,3 @@ type TTSRequest struct {
 	Voice   string `json:"voice" yaml:"voice"`
 	Backend string `json:"backend" yaml:"backend"`
 }
-
-type StoresSet struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys   [][]float32 `json:"keys" yaml:"keys"`
-	Values []string    `json:"values" yaml:"values"`
-}
-
-type StoresDelete struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys [][]float32 `json:"keys"`
-}
-
-type StoresGet struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys [][]float32 `json:"keys" yaml:"keys"`
-}
-
-type StoresGetResponse struct {
-	Keys   [][]float32 `json:"keys" yaml:"keys"`
-	Values []string    `json:"values" yaml:"values"`
-}
-
-type StoresFind struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Key  []float32 `json:"key" yaml:"key"`
-	Topk int       `json:"topk" yaml:"topk"`
-}
-
-type StoresFindResponse struct {
-	Keys         [][]float32 `json:"keys" yaml:"keys"`
-	Values       []string    `json:"values" yaml:"values"`
-	Similarities []float32   `json:"similarities" yaml:"similarities"`
-}
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -108,7 +108,7 @@ type ChatCompletionResponseFormat struct {
 type OpenAIRequest struct {
 	PredictionOptions

-	Context context.Context  `json:"-"`
+	Context context.Context    `json:"-"`
 	Cancel  context.CancelFunc `json:"-"`

 	// whisper
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -25,7 +25,6 @@ type PredictionOptions struct {
 	Keep          int     `json:"n_keep" yaml:"n_keep"`

 	FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
-	PresencePenalty  float64 `json:"presence_penalty" yaml:"presence_penalty"`
 	TFZ              float64 `json:"tfz" yaml:"tfz"`

 	TypicalP float64 `json:"typical_p" yaml:"typical_p"`
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -58,14 +58,12 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 	cl := config.NewBackendConfigLoader()
 	ml := model.NewModelLoader(options.ModelPath)

-	configLoaderOpts := options.ToConfigLoaderOptions()
-
-	if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
+	if err := cl.LoadBackendConfigsFromPath(options.ModelPath); err != nil {
 		log.Error().Msgf("error loading config files: %s", err.Error())
 	}

 	if options.ConfigFile != "" {
-		if err := cl.LoadBackendConfigFile(options.ConfigFile, configLoaderOpts...); err != nil {
+		if err := cl.LoadBackendConfigFile(options.ConfigFile); err != nil {
 			log.Error().Msgf("error loading config file: %s", err.Error())
 		}
 	}
--- a/docs/content/docs/features/stores.md
+++ b/docs/content/docs/features/stores.md
@@ -1,97 +0,0 @@
-
-+++
-disableToc = false
-title = "💾 Stores"
-
-weight = 18
-url = '/stores'
-+++
-
-Stores are an experimental feature to help with querying data using similarity search. It is
-a low level API that consists of only `get`, `set`, `delete` and `find`.
-
-For example if you have an embedding of some text and want to find text with similar embeddings.
-You can create embeddings for chunks of all your text then compare them against the embedding of the text you
-are searching on.
-
-An embedding here meaning a vector of numbers that represent some information about the text. The
-embeddings are created from an A.I. model such as BERT or a more traditional method such as word
-frequency.
-
-Previously you would have to integrate with an external vector database or library directly.
-With the stores feature you can now do it through the LocalAI API. 
-
-Note however that doing a similarity search on embeddings is just one way to do retrieval. A higher level
-API can take this into account, so this may not be the best place to start.
-
-## API overview
-
-There is an internal gRPC API and an external facing HTTP JSON API. We'll just discuss the external HTTP API,
-however the HTTP API mirrors the gRPC API. Consult `pkg/store/client` for internal usage.
-
-Everything is in columnar format meaning that instead of getting an array of objects with a key and a value each. 
-You instead get two separate arrays of keys and values.
-
-Keys are arrays of floating point numbers with a maximum width of 32bits. Values are strings (in gRPC they are bytes).
-
-The key vectors must all be the same length and it's best for search performance if they are normalized. When
-addings keys it will be detected if they are not normalized and what length they are.
-
-All endpoints accept a `store` field which specifies which store to operate on. Presently they are created
-on the fly and there is only one store backend so no configuration is required.
-
-## Set
-
-To set some keys you can do
-
-```
-curl -X POST http://localhost:8080/stores/set \
-     -H "Content-Type: application/json" \
-     -d '{"keys": [[0.1, 0.2], [0.3, 0.4]], "values": ["foo", "bar"]}'
-```
-
-Setting the same keys again will update their values.
-
-On success 200 OK is returned with no body.
-
-## Get
-
-To get some keys you can do
-
-```
-curl -X POST http://localhost:8080/stores/get \
-     -H "Content-Type: application/json" \
-     -d '{"keys": [[0.1, 0.2]]}'
-```
-
-Both the keys and values are returned, e.g: `{"keys":[[0.1,0.2]],"values":["foo"]}`
-
-The order of the keys is not preserved! If a key does not exist then nothing is returned.
-
-## Delete
-
-To delete keys and values you can do
-
-```
-curl -X POST http://localhost:8080/stores/delete \
-     -H "Content-Type: application/json" \
-     -d '{"keys": [[0.1, 0.2]]}'
-```
-
-If a key doesn't exist then it is ignored.
-
-On success 200 OK is returned with no body.
-
-## Find
-
-To do a similarity search you can do
-
-```
-curl -X POST http://localhost:8080/stores/find 
-     -H "Content-Type: application/json" \
-     -d '{"topk": 2, "key": [0.2, 0.1]}'
-```
-
-`topk` limits the number of results returned. The result value is the same as `get`,
-except that it also includes an array of `similarities`. Where `1.0` is the maximum similarity.
-They are returned in the order of most similar to least.
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/docs/getting-started/build.md
@@ -45,8 +45,6 @@ To install the dependencies follow the instructions below:
 {{< tabs tabTotal="3"  >}}
 {{% tab tabName="Apple" %}}

-Install `xcode` from the App Store
-
 ```bash
 brew install abseil cmake go grpc protobuf wget
 ```
@@ -113,12 +111,10 @@ docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS

 ### Example: Build on mac

-Building on Mac (M1, M2 or M3) works, but you may need to install some prerequisites using `brew`. 
+Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`. 

 The below has been tested by one mac user and found to work. Note that this doesn't use Docker to run the server:

-Install `xcode` from the Apps Store (needed for metalkit)
-
 ```
 # install build dependencies
 brew install abseil cmake go grpc protobuf wget
@@ -150,20 +146,8 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
   }'
 ```

-#### Troublshooting mac
+### Build with Image generation support

-If you encounter errors regarding a missing utility metal, install `Xcode` from the App Store.
-If completions are slow, ensure that `gpu-layers` in your model yaml matches the number of layers from the model in use (or simply use a high number such as 256).
-If you a get a compile error: `error: only virtual member functions can be marked 'final'`, reinstall all the necessary brew packages, clean the build, and try again.
-
-```
-# reinstall build dependencies
-brew reinstall abseil cmake go grpc protobuf wget
-
-make clean
-
-make build
-```

 **Requirements**: OpenCV, Gomp

@@ -255,12 +239,13 @@ make BUILD_TYPE=sycl_f32 build # for float32
 #### Metal (Apple Silicon)

 ```
-make build
+make BUILD_TYPE=metal build

-# correct build type is automatically used on mac (BUILD_TYPE=metal)
-# Set `gpu_layers: 256` (or equal to the number of model layers) to your YAML model config file and `f16: true`
+# Set `gpu_layers: 1` to your YAML model config file and `f16: true`
+# Note: only models quantized with q4_0 are supported!
 ```

+
 ### Windows compatibility

 Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -73,7 +73,6 @@ Note that this started just as a fun weekend project by [mudler](https://github.
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
 - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
 - 🆕 [Vision API](https://localai.io/features/gpt-vision/)
- 💾 [Stores](https://localai.io/features/stores)

 ## Contribute and help

--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.10.1"
+  "version": "v2.9.0"
 }
--- a/embedded/models/bakllava.yaml
+++ b/embedded/models/bakllava.yaml
@@ -1,40 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: bakllava
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: bakllava-mmproj.gguf
-parameters:
-  model: bakllava.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "bakllava",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/embedded/models/cerbero.yaml
+++ b/embedded/models/cerbero.yaml
@@ -1,24 +0,0 @@
-backend: llama
-context_size: 8192
-f16: false
-gpu_layers: 90
-name: cerbero
-mmap: false
-parameters:
-  model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf
-  top_k: 80
-  temperature: 0.2
-  top_p: 0.7
-template:
-  completion: "{{.Input}}"
-  chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|]  "
-roles:
-  user: "[|Umano|] "
-  system: "[|Umano|] "
-  assistant: "[|Assistente|] "
-
-stopwords:
- "[|Umano|]"
-
-trimsuffix: 
- "\n"
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -1,51 +0,0 @@
-name: hermes-2-pro-mistral
-mmap: true
-parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-
-roles:
-  assistant_function_call: assistant
-  function: tool
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
-    {{ if eq .RoleName "function" }}<tool_result>{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
-    {{ if eq .RoleName "function" }}</tool_result>{{end}}
-    <|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 
-    <tools>
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    </tools> 
-    Use the following pydantic model json schema for each tool call you will make: 
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} 
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
-    <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call><|im_end|>
-    {{.Input}}
-    <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
- <|im_end|>
- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "hermes-2-pro-mistral",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
--- a/embedded/models/llava-1.5.yaml
+++ b/embedded/models/llava-1.5.yaml
@@ -1,33 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.5
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
-parameters:
-  model: llava-v1.5-7b-Q4_K.gguf
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
- filename: llava-v1.5-7b-Q4_K.gguf
-  uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
-  uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.5",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/embedded/models/llava-1.6-mistral.yaml
+++ b/embedded/models/llava-1.6-mistral.yaml
@@ -1,33 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.6-mistral
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: llava-v1.6-7b-mmproj-f16.gguf
-parameters:
-  model: llava-v1.6-mistral-7b.gguf
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
- filename: llava-v1.6-mistral-7b.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
- filename: llava-v1.6-7b-mmproj-f16.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.6-mistral",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/embedded/models/llava-1.6-vicuna.yaml
+++ b/embedded/models/llava-1.6-vicuna.yaml
@@ -1,37 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.6-vicuna
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: mmproj-vicuna7b-f16.gguf
-parameters:
-  model: vicuna-7b-q5_k.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
- filename: vicuna-7b-q5_k.gguf
-  uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
- filename: mmproj-vicuna7b-f16.gguf
-  uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.6-vicuna",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
--- a/embedded/models/phi-2-chat.yaml
+++ b/embedded/models/phi-2-chat.yaml
@@ -1,25 +0,0 @@
-name: phi-2-chat
-mmap: true
-parameters:
-  model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
- <|im_end|>
- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2-chat",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
--- a/examples/semantic-todo/README.md
+++ b/examples/semantic-todo/README.md
@@ -1,15 +0,0 @@
-This demonstrates the vector store backend in its simplest form. 
-You can add tasks and then search/sort them using the TUI. 
-
-To build and run do
-
-```bash
-$ go get .
-$ go run .
-```
-
-A seperate LocaAI instance is required of course. For e.g.
-
-```bash
-$ docker run -e DEBUG=true --rm -it -p 8080:8080 <LocalAI-image> bert-cpp
-```
--- a/examples/semantic-todo/go.mod
+++ b/examples/semantic-todo/go.mod
@@ -1,18 +0,0 @@
-module semantic-todo
-
-go 1.21.6
-
-require (
-	github.com/gdamore/tcell/v2 v2.7.1
-	github.com/rivo/tview v0.0.0-20240307173318-e804876934a1
-)
-
-require (
-	github.com/gdamore/encoding v1.0.0 // indirect
-	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
-	github.com/mattn/go-runewidth v0.0.15 // indirect
-	github.com/rivo/uniseg v0.4.7 // indirect
-	golang.org/x/sys v0.17.0 // indirect
-	golang.org/x/term v0.17.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
-)
--- a/examples/semantic-todo/go.sum
+++ b/examples/semantic-todo/go.sum
@@ -1,50 +0,0 @@
-github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko=
-github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg=
-github.com/gdamore/tcell/v2 v2.7.1 h1:TiCcmpWHiAU7F0rA2I3S2Y4mmLmO9KHxJ7E1QhYzQbc=
-github.com/gdamore/tcell/v2 v2.7.1/go.mod h1:dSXtXTSK0VsW1biw65DZLZ2NKr7j0qP/0J7ONmsraWg=
-github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
-github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
-github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
-github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/rivo/tview v0.0.0-20240307173318-e804876934a1 h1:bWLHTRekAy497pE7+nXSuzXwwFHI0XauRzz6roUvY+s=
-github.com/rivo/tview v0.0.0-20240307173318-e804876934a1/go.mod h1:02iFIz7K/A9jGCvrizLPvoqr4cEIx7q54RH5Qudkrss=
-github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
-github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
-golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
-golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U=
-golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/examples/semantic-todo/main.go
+++ b/examples/semantic-todo/main.go
@@ -1,352 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"net/http"
-
-	"github.com/gdamore/tcell/v2"
-	"github.com/rivo/tview"
-)
-
-const (
-	localAI     string = "http://localhost:8080"
-	rootStatus  string = "[::b]<space>[::-]: Add Task  [::b]/[::-]: Search Task  [::b]<C-c>[::-]: Exit"
-	inputStatus string = "Press [::b]<enter>[::-] to submit the task, [::b]<esc>[::-] to cancel"
-)
-
-type Task struct {
-	Description string
-	Similarity  float32
-}
-
-type AppState int
-
-const (
-	StateRoot AppState = iota
-	StateInput
-	StateSearch
-)
-
-type App struct {
-	state AppState
-	tasks []Task
-	app   *tview.Application
-	flex  *tview.Flex
-	table *tview.Table
-}
-
-func NewApp() *App {
-	return &App{
-		state: StateRoot,
-		tasks: []Task{
-			{Description: "Take the dog for a walk (after I get a dog)"},
-			{Description: "Go to the toilet"},
-			{Description: "Allow TODOs to be marked completed or removed"},
-		},
-	}
-}
-
-func getEmbeddings(description string) ([]float32, error) {
-	// Define the request payload
-	payload := map[string]interface{}{
-		"model": "bert-cpp-minilm-v6",
-		"input": description,
-	}
-
-	// Marshal the payload into JSON
-	jsonPayload, err := json.Marshal(payload)
-	if err != nil {
-		return nil, err
-	}
-
-	// Make the HTTP request to the local OpenAI embeddings API
-	resp, err := http.Post(localAI+"/embeddings", "application/json", bytes.NewBuffer(jsonPayload))
-	if err != nil {
-		return nil, err
-	}
-	defer resp.Body.Close()
-
-	// Check if the request was successful
-	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("request to embeddings API failed with status code: %d", resp.StatusCode)
-	}
-
-	// Parse the response body
-	var result struct {
-		Data []struct {
-			Embedding []float32 `json:"embedding"`
-		} `json:"data"`
-	}
-	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
-		return nil, err
-	}
-
-	// Return the embedding
-	if len(result.Data) > 0 {
-		return result.Data[0].Embedding, nil
-	}
-	return nil, errors.New("no embedding received from API")
-}
-
-type StoresSet struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys   [][]float32 `json:"keys" yaml:"keys"`
-	Values []string    `json:"values" yaml:"values"`
-}
-
-func postTasksToExternalService(tasks []Task) error {
-	keys := make([][]float32, 0, len(tasks))
-	// Get the embeddings for the task description
-	for _, task := range tasks {
-		embedding, err := getEmbeddings(task.Description)
-		if err != nil {
-			return err
-		}
-		keys = append(keys, embedding)
-	}
-
-	values := make([]string, 0, len(tasks))
-	for _, task := range tasks {
-		values = append(values, task.Description)
-	}
-
-	// Construct the StoresSet object
-	storesSet := StoresSet{
-		Store:  "tasks_store", // Assuming you have a specific store name
-		Keys:   keys,
-		Values: values,
-	}
-
-	// Marshal the StoresSet object into JSON
-	jsonData, err := json.Marshal(storesSet)
-	if err != nil {
-		return err
-	}
-
-	// Make the HTTP POST request to the external service
-	resp, err := http.Post(localAI+"/stores/set", "application/json", bytes.NewBuffer(jsonData))
-	if err != nil {
-		return err
-	}
-	defer resp.Body.Close()
-
-	// Check if the request was successful
-	if resp.StatusCode != http.StatusOK {
-		// read resp body into string
-		body, err := io.ReadAll(resp.Body)
-		if err != nil {
-			return err
-		}
-		return fmt.Errorf("store request failed with status code: %d: %s", resp.StatusCode, body)
-	}
-
-	return nil
-}
-
-type StoresFind struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Key  []float32 `json:"key" yaml:"key"`
-	Topk int       `json:"topk" yaml:"topk"`
-}
-
-type StoresFindResponse struct {
-	Keys         [][]float32 `json:"keys" yaml:"keys"`
-	Values       []string    `json:"values" yaml:"values"`
-	Similarities []float32   `json:"similarities" yaml:"similarities"`
-}
-
-func findSimilarTexts(inputText string, topk int) (StoresFindResponse, error) {
-	// Initialize an empty response object
-	response := StoresFindResponse{}
-
-	// Get the embedding for the input text
-	embedding, err := getEmbeddings(inputText)
-	if err != nil {
-		return response, err
-	}
-
-	// Construct the StoresFind object
-	storesFind := StoresFind{
-		Store: "tasks_store", // Assuming you have a specific store name
-		Key:   embedding,
-		Topk:  topk,
-	}
-
-	// Marshal the StoresFind object into JSON
-	jsonData, err := json.Marshal(storesFind)
-	if err != nil {
-		return response, err
-	}
-
-	// Make the HTTP POST request to the external service's /stores/find endpoint
-	resp, err := http.Post(localAI+"/stores/find", "application/json", bytes.NewBuffer(jsonData))
-	if err != nil {
-		return response, err
-	}
-	defer resp.Body.Close()
-
-	// Check if the request was successful
-	if resp.StatusCode != http.StatusOK {
-		return response, fmt.Errorf("request to /stores/find failed with status code: %d", resp.StatusCode)
-	}
-
-	// Parse the response body to retrieve similar texts and similarities
-	if err := json.NewDecoder(resp.Body).Decode(&response); err != nil {
-		return response, err
-	}
-
-	return response, nil
-}
-
-func (app *App) updateUI() {
-	// Clear the flex layout
-	app.flex.Clear()
-	app.flex.SetDirection(tview.FlexColumn)
-	app.flex.AddItem(nil, 0, 1, false)
-
-	midCol := tview.NewFlex()
-	midCol.SetDirection(tview.FlexRow)
-	midCol.AddItem(nil, 0, 1, false)
-
-	// Create a new table.
-	app.table.Clear()
-	app.table.SetBorders(true)
-
-	// Set table headers
-	app.table.SetCell(0, 0, tview.NewTableCell("Description").SetAlign(tview.AlignLeft).SetExpansion(1).SetAttributes(tcell.AttrBold))
-	app.table.SetCell(0, 1, tview.NewTableCell("Similarity").SetAlign(tview.AlignCenter).SetExpansion(0).SetAttributes(tcell.AttrBold))
-
-	// Add the tasks to the table.
-	for i, task := range app.tasks {
-		row := i + 1
-		app.table.SetCell(row, 0, tview.NewTableCell(task.Description))
-		app.table.SetCell(row, 1, tview.NewTableCell(fmt.Sprintf("%.2f", task.Similarity)))
-	}
-
-	if app.state == StateInput {
-		inputField := tview.NewInputField()
-		inputField.
-			SetLabel("New Task: ").
-			SetFieldWidth(0).
-			SetDoneFunc(func(key tcell.Key) {
-				if key == tcell.KeyEnter {
-					task := Task{Description: inputField.GetText()}
-					app.tasks = append(app.tasks, task)
-					app.state = StateRoot
-					postTasksToExternalService([]Task{task})
-				}
-				app.updateUI()
-			})
-		midCol.AddItem(inputField, 3, 2, true)
-		app.app.SetFocus(inputField)
-	} else if app.state == StateSearch {
-		searchField := tview.NewInputField()
-		searchField.SetLabel("Search: ").
-			SetFieldWidth(0).
-			SetDoneFunc(func(key tcell.Key) {
-				if key == tcell.KeyEnter {
-					similar, err := findSimilarTexts(searchField.GetText(), 100)
-					if err != nil {
-						panic(err)
-					}
-					app.tasks = make([]Task, len(similar.Keys))
-					for i, v := range similar.Values {
-						app.tasks[i] = Task{Description: v, Similarity: similar.Similarities[i]}
-					}
-				}
-				app.updateUI()
-			})
-		midCol.AddItem(searchField, 3, 2, true)
-		app.app.SetFocus(searchField)
-	} else {
-		midCol.AddItem(nil, 3, 1, false)
-	}
-
-	midCol.AddItem(app.table, 0, 2, true)
-
-	// Add the status bar to the flex layout
-	statusBar := tview.NewTextView().
-		SetText(rootStatus).
-		SetDynamicColors(true).
-		SetTextAlign(tview.AlignCenter)
-	if app.state == StateInput {
-		statusBar.SetText(inputStatus)
-	}
-	midCol.AddItem(statusBar, 1, 1, false)
-	midCol.AddItem(nil, 0, 1, false)
-
-	app.flex.AddItem(midCol, 0, 10, true)
-	app.flex.AddItem(nil, 0, 1, false)
-
-	// Set the flex as the root element
-	app.app.SetRoot(app.flex, true)
-}
-
-func main() {
-	app := NewApp()
-	tApp := tview.NewApplication()
-	flex := tview.NewFlex().SetDirection(tview.FlexRow)
-	table := tview.NewTable()
-
-	app.app = tApp
-	app.flex = flex
-	app.table = table
-
-	app.updateUI() // Initial UI setup
-
-	app.app.SetInputCapture(func(event *tcell.EventKey) *tcell.EventKey {
-		switch app.state {
-		case StateRoot:
-			// Handle key events when in the root state
-			switch event.Key() {
-			case tcell.KeyRune:
-				switch event.Rune() {
-				case ' ':
-					app.state = StateInput
-					app.updateUI()
-					return nil // Event is handled
-				case '/':
-					app.state = StateSearch
-					app.updateUI()
-					return nil // Event is handled
-				}
-			}
-
-		case StateInput:
-			// Handle key events when in the input state
-			if event.Key() == tcell.KeyEsc {
-				// Exit input state without adding a task
-				app.state = StateRoot
-				app.updateUI()
-				return nil // Event is handled
-			}
-
-		case StateSearch:
-			// Handle key events when in the search state
-			if event.Key() == tcell.KeyEsc {
-				// Exit search state
-				app.state = StateRoot
-				app.updateUI()
-				return nil // Event is handled
-			}
-		}
-
-		// Return the event for further processing by tview
-		return event
-	})
-
-	if err := postTasksToExternalService(app.tasks); err != nil {
-		panic(err)
-	}
-
-	// Start the application
-	if err := app.app.Run(); err != nil {
-		panic(err)
-	}
-}
--- a/go.mod
+++ b/go.mod
@@ -25,7 +25,7 @@ require (
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
 	github.com/prometheus/client_golang v1.17.0
 	github.com/rs/zerolog v1.31.0
-	github.com/sashabaranov/go-openai v1.20.4
+	github.com/sashabaranov/go-openai v1.16.0
 	github.com/schollz/progressbar/v3 v3.13.1
 	github.com/stretchr/testify v1.8.4
 	github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
@@ -53,72 +53,40 @@ require (
 )

 require (
-	github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
-	github.com/Masterminds/goutils v1.1.1 // indirect
-	github.com/Masterminds/semver/v3 v3.2.0 // indirect
-	github.com/Masterminds/sprig/v3 v3.2.3 // indirect
-	github.com/Microsoft/go-winio v0.6.0 // indirect
-	github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
 	github.com/alecthomas/chroma v0.10.0 // indirect
 	github.com/aymanbagabas/go-osc52 v1.0.3 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
-	github.com/cenkalti/backoff/v4 v4.1.3 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
 	github.com/charmbracelet/glamour v0.6.0 // indirect
-	github.com/containerd/continuity v0.3.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/dlclark/regexp2 v1.8.1 // indirect
-	github.com/docker/cli v20.10.17+incompatible // indirect
-	github.com/docker/docker v20.10.7+incompatible // indirect
-	github.com/docker/go-connections v0.4.0 // indirect
-	github.com/docker/go-units v0.4.0 // indirect
 	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
-	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
 	github.com/golang/snappy v0.0.2 // indirect
-	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/css v1.0.0 // indirect
-	github.com/huandu/xstrings v1.3.3 // indirect
 	github.com/klauspost/pgzip v1.2.5 // indirect
 	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
 	github.com/microcosm-cc/bluemonday v1.0.26 // indirect
 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
-	github.com/mitchellh/copystructure v1.0.0 // indirect
-	github.com/mitchellh/mapstructure v1.5.0 // indirect
-	github.com/mitchellh/reflectwalk v1.0.0 // indirect
-	github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect
 	github.com/muesli/reflow v0.3.0 // indirect
 	github.com/muesli/termenv v0.13.0 // indirect
 	github.com/nwaples/rardecode v1.1.0 // indirect
 	github.com/olekukonko/tablewriter v0.0.5 // indirect
-	github.com/opencontainers/go-digest v1.0.0 // indirect
-	github.com/opencontainers/image-spec v1.0.2 // indirect
-	github.com/opencontainers/runc v1.1.5 // indirect
-	github.com/ory/dockertest/v3 v3.10.0 // indirect
 	github.com/pierrec/lz4/v4 v4.1.2 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pkoukk/tiktoken-go v0.1.2 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
 	github.com/prometheus/common v0.44.0 // indirect
 	github.com/prometheus/procfs v0.11.1 // indirect
-	github.com/shopspring/decimal v1.2.0 // indirect
-	github.com/sirupsen/logrus v1.8.1 // indirect
-	github.com/spf13/cast v1.3.1 // indirect
 	github.com/ulikunitz/xz v0.5.9 // indirect
-	github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
-	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
-	github.com/xeipuuv/gojsonschema v1.2.0 // indirect
 	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
 	github.com/yuin/goldmark v1.5.2 // indirect
 	github.com/yuin/goldmark-emoji v1.0.1 // indirect
 	go.opentelemetry.io/otel/sdk v1.19.0 // indirect
 	go.opentelemetry.io/otel/trace v1.19.0 // indirect
-	golang.org/x/crypto v0.14.0 // indirect
-	golang.org/x/mod v0.12.0 // indirect
 	golang.org/x/term v0.13.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
--- a/go.sum
+++ b/go.sum
@@ -1,18 +1,5 @@
-github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8=
-github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
-github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
 github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
-github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
-github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
-github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
-github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
-github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
-github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
-github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg=
-github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
-github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
-github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
 github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
 github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
 github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
@@ -24,47 +11,27 @@ github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuP
 github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
-github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4=
-github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
 github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
 github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc=
 github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc=
-github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
-github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
-github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
-github.com/containerd/continuity v0.3.0 h1:nisirsYROK15TAMVukJOUyGJjz4BNQJBVsNvAXZJ/eg=
-github.com/containerd/continuity v0.3.0/go.mod h1:wJEAIwKOm/pBZuBd0JmeTvnLquTB1Ag8espWhkykbPM=
-github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
-github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0=
 github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
-github.com/docker/cli v20.10.17+incompatible h1:eO2KS7ZFeov5UJeaDmIs1NFEDRf32PaqRpvoEkKBy5M=
-github.com/docker/cli v20.10.17+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
-github.com/docker/docker v20.10.7+incompatible h1:Z6O9Nhsjv+ayUEeI1IojKbYcsGdgYSNqxe1s2MYzUhQ=
-github.com/docker/docker v20.10.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
-github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
-github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
-github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df h1:qVcBEZlvp5A1gGWNJj02xyDtbsUI2hohlQMSB1fgER4=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
-github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
@@ -93,11 +60,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw=
 github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw=
-github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
-github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
 github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
@@ -114,7 +78,6 @@ github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
@@ -122,9 +85,6 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
-github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
-github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
-github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
 github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
@@ -135,15 +95,10 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l
 github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
-github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
-github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
-github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
-github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
@@ -151,11 +106,8 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
 github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
-github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
@@ -184,16 +136,6 @@ github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3r
 github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
-github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
-github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
-github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
-github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
-github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
-github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
-github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
-github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 h1:rzf0wL0CHVc8CEsgyygG0Mn9CNCCPZqOPaz8RiiHYQk=
-github.com/moby/term v0.0.0-20201216013528-df9cb8a40635/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc=
-github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
 github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
 github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
@@ -224,16 +166,6 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y
 github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
 github.com/onsi/gomega v1.28.1 h1:MijcGUbfYuznzK/5R4CPNoUP/9Xvuo20sXfEm6XxoTA=
 github.com/onsi/gomega v1.28.1/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ=
-github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
-github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
-github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM=
-github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
-github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs=
-github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
-github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
-github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
-github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4=
-github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg=
 github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
 github.com/otiai10/mint v1.6.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
 github.com/otiai10/openaigo v1.6.0 h1:YTQEbtDSvawETOB/Kmb/6JvuHdHH/eIpSQfHVufiwY8=
@@ -242,8 +174,6 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1H
 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
 github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
-github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkoukk/tiktoken-go v0.1.2 h1:u7PCSBiWJ3nJYoTGShyM9iHXz4dNyYkurwwp+GHtyHY=
 github.com/pkoukk/tiktoken-go v0.1.2/go.mod h1:boMWvk9pQCOTx11pgu0DrIdrAKgQzzJKUP6vLXaz7Rw=
@@ -267,16 +197,12 @@ github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUz
 github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
 github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
-github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.16.0 h1:34W6WV84ey6OpW0p2UewZkdMu82AxGC+BzpU6iiauRw=
 github.com/sashabaranov/go-openai v1.16.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
-github.com/sashabaranov/go-openai v1.20.4 h1:095xQ/fAtRa0+Rj21sezVJABgKfGPNbyx/sAN/hJUmg=
-github.com/sashabaranov/go-openai v1.20.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
 github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ=
-github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
 github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4=
 github.com/shirou/gopsutil/v3 v3.23.9 h1:ZI5bWVeu2ep4/DIxB4U9okeYJ7zp/QLTO4auRb/ty/E=
 github.com/shirou/gopsutil/v3 v3.23.9/go.mod h1:x/NWSb71eMcjFIO0vhyGW5nZ7oSIgVjrCnADckb85GA=
@@ -284,18 +210,9 @@ github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFt
 github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
 github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
 github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
-github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
-github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
-github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
-github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
-github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
-github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
-github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
-github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
@@ -304,7 +221,6 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
 github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
 github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
 github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
@@ -316,7 +232,6 @@ github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701/go.mod h1:SiwyRS7s
 github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
 github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
-github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
 github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
@@ -325,21 +240,11 @@ github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e
 github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
-github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
-github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
-github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
-github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
-github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
-github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
-github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
-github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
-github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 github.com/yuin/goldmark v1.5.2 h1:ALmeCk/px5FSm1MAcFBAsVKZjDuMVj8Tm7FFIlMJnqU=
 github.com/yuin/goldmark v1.5.2/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 github.com/yuin/goldmark-emoji v1.0.1 h1:ctuWEyzGBwiucEqxzwe0SOYDXPAucOrE9NQC18Wa1os=
@@ -361,61 +266,37 @@ go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmY
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
-golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
-golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
-golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
 golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
-golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
 golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
 golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200831180312-196b9ba8737a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -430,7 +311,6 @@ golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
 golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
 golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
@@ -438,16 +318,11 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
-golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
-golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
 golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -466,7 +341,6 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
 google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -485,4 +359,3 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
--- a/main.go
+++ b/main.go
@@ -306,16 +306,11 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
 				return fmt.Errorf("failed basic startup tasks with error %s", err.Error())
 			}

-			configdir := ctx.String("localai-config-dir")
-			// Watch the configuration directory
-			// If the directory does not exist, we don't watch it
-			if _, err := os.Stat(configdir); err == nil {
-				closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options)
-				defer closeConfigWatcherFn()
+			closeConfigWatcherFn, err := startup.WatchConfigDirectory(ctx.String("localai-config-dir"), options)
+			defer closeConfigWatcherFn()

-				if err != nil {
-					return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir"))
-				}
+			if err != nil {
+				return fmt.Errorf("failed while watching configuration directory %s", ctx.String("localai-config-dir"))
 			}

 			appHTTP, err := http.App(cl, ml, options)
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -44,9 +44,4 @@ type Backend interface {
 	AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error)
 	TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
 	Status(ctx context.Context) (*pb.StatusResponse, error)
-
-	StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error)
-	StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error)
-	StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
-	StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
 }
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -72,22 +72,6 @@ func (llm *Base) Status() (pb.StatusResponse, error) {
 	}, nil
 }

-func (llm *Base) StoresSet(*pb.StoresSetOptions) error {
-	return fmt.Errorf("unimplemented")
-}
-
-func (llm *Base) StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error) {
-	return pb.StoresGetResult{}, fmt.Errorf("unimplemented")
-}
-
-func (llm *Base) StoresDelete(*pb.StoresDeleteOptions) error {
-	return fmt.Errorf("unimplemented")
-}
-
-func (llm *Base) StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error) {
-	return pb.StoresFindResult{}, fmt.Errorf("unimplemented")
-}
-
 func memoryUsage() *pb.MemoryUsageData {
 	mud := pb.MemoryUsageData{
 		Breakdown: make(map[string]uint64),
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -291,67 +291,3 @@ func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) {
 	client := pb.NewBackendClient(conn)
 	return client.Status(ctx, &pb.HealthMessage{})
 }
-
-func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
-	if !c.parallel {
-		c.opMutex.Lock()
-		defer c.opMutex.Unlock()
-	}
-	c.setBusy(true)
-	defer c.setBusy(false)
-	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
-	if err != nil {
-		return nil, err
-	}
-	defer conn.Close()
-	client := pb.NewBackendClient(conn)
-	return client.StoresSet(ctx, in, opts...)
-}
-
-func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
-	if !c.parallel {
-		c.opMutex.Lock()
-		defer c.opMutex.Unlock()
-	}
-	c.setBusy(true)
-	defer c.setBusy(false)
-	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
-	if err != nil {
-		return nil, err
-	}
-	defer conn.Close()
-	client := pb.NewBackendClient(conn)
-	return client.StoresDelete(ctx, in, opts...)
-}
-
-func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
-	if !c.parallel {
-		c.opMutex.Lock()
-		defer c.opMutex.Unlock()
-	}
-	c.setBusy(true)
-	defer c.setBusy(false)
-	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
-	if err != nil {
-		return nil, err
-	}
-	defer conn.Close()
-	client := pb.NewBackendClient(conn)
-	return client.StoresGet(ctx, in, opts...)
-}
-
-func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {
-	if !c.parallel {
-		c.opMutex.Lock()
-		defer c.opMutex.Unlock()
-	}
-	c.setBusy(true)
-	defer c.setBusy(false)
-	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
-	if err != nil {
-		return nil, err
-	}
-	defer conn.Close()
-	client := pb.NewBackendClient(conn)
-	return client.StoresFind(ctx, in, opts...)
-}
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -85,22 +85,6 @@ func (e *embedBackend) Status(ctx context.Context) (*pb.StatusResponse, error) {
 	return e.s.Status(ctx, &pb.HealthMessage{})
 }

-func (e *embedBackend) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
-	return e.s.StoresSet(ctx, in)
-}
-
-func (e *embedBackend) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
-	return e.s.StoresDelete(ctx, in)
-}
-
-func (e *embedBackend) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
-	return e.s.StoresGet(ctx, in)
-}
-
-func (e *embedBackend) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {
-	return e.s.StoresFind(ctx, in)
-}
-
 type embedBackendServerStream struct {
 	ctx context.Context
 	fn  func(s []byte)
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -19,11 +19,6 @@ type LLM interface {
 	TTS(*pb.TTSRequest) error
 	TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
 	Status() (pb.StatusResponse, error)
-
-	StoresSet(*pb.StoresSetOptions) error
-	StoresDelete(*pb.StoresDeleteOptions) error
-	StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error)
-	StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)
 }

 func newReply(s string) *pb.Reply {
--- a/pkg/grpc/proto/backend.pb.go
+++ b/pkg/grpc/proto/backend.pb.go
--- a/pkg/grpc/proto/backend_grpc.pb.go
+++ b/pkg/grpc/proto/backend_grpc.pb.go
@@ -1,6 +1,6 @@
 // Code generated by protoc-gen-go-grpc. DO NOT EDIT.
 // versions:
-// - protoc-gen-go-grpc v1.3.0
+// - protoc-gen-go-grpc v1.2.0
 // - protoc             v4.23.4
 // source: backend.proto

@@ -18,23 +18,6 @@ import (
 // Requires gRPC-Go v1.32.0 or later.
 const _ = grpc.SupportPackageIsVersion7

-const (
-	Backend_Health_FullMethodName             = "/backend.Backend/Health"
-	Backend_Predict_FullMethodName            = "/backend.Backend/Predict"
-	Backend_LoadModel_FullMethodName          = "/backend.Backend/LoadModel"
-	Backend_PredictStream_FullMethodName      = "/backend.Backend/PredictStream"
-	Backend_Embedding_FullMethodName          = "/backend.Backend/Embedding"
-	Backend_GenerateImage_FullMethodName      = "/backend.Backend/GenerateImage"
-	Backend_AudioTranscription_FullMethodName = "/backend.Backend/AudioTranscription"
-	Backend_TTS_FullMethodName                = "/backend.Backend/TTS"
-	Backend_TokenizeString_FullMethodName     = "/backend.Backend/TokenizeString"
-	Backend_Status_FullMethodName             = "/backend.Backend/Status"
-	Backend_StoresSet_FullMethodName          = "/backend.Backend/StoresSet"
-	Backend_StoresDelete_FullMethodName       = "/backend.Backend/StoresDelete"
-	Backend_StoresGet_FullMethodName          = "/backend.Backend/StoresGet"
-	Backend_StoresFind_FullMethodName         = "/backend.Backend/StoresFind"
-)
-
 // BackendClient is the client API for Backend service.
 //
 // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
@@ -49,10 +32,6 @@ type BackendClient interface {
 	TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error)
 	TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error)
 	Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error)
-	StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error)
-	StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error)
-	StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error)
-	StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error)
 }

 type backendClient struct {
@@ -65,7 +44,7 @@ func NewBackendClient(cc grpc.ClientConnInterface) BackendClient {

 func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*Reply, error) {
 	out := new(Reply)
-	err := c.cc.Invoke(ctx, Backend_Health_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/Health", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -74,7 +53,7 @@ func (c *backendClient) Health(ctx context.Context, in *HealthMessage, opts ...g

 func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*Reply, error) {
 	out := new(Reply)
-	err := c.cc.Invoke(ctx, Backend_Predict_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/Predict", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -83,7 +62,7 @@ func (c *backendClient) Predict(ctx context.Context, in *PredictOptions, opts ..

 func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ...grpc.CallOption) (*Result, error) {
 	out := new(Result)
-	err := c.cc.Invoke(ctx, Backend_LoadModel_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/LoadModel", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -91,7 +70,7 @@ func (c *backendClient) LoadModel(ctx context.Context, in *ModelOptions, opts ..
 }

 func (c *backendClient) PredictStream(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (Backend_PredictStreamClient, error) {
-	stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], Backend_PredictStream_FullMethodName, opts...)
+	stream, err := c.cc.NewStream(ctx, &Backend_ServiceDesc.Streams[0], "/backend.Backend/PredictStream", opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -124,7 +103,7 @@ func (x *backendPredictStreamClient) Recv() (*Reply, error) {

 func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*EmbeddingResult, error) {
 	out := new(EmbeddingResult)
-	err := c.cc.Invoke(ctx, Backend_Embedding_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/Embedding", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -133,7 +112,7 @@ func (c *backendClient) Embedding(ctx context.Context, in *PredictOptions, opts

 func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequest, opts ...grpc.CallOption) (*Result, error) {
 	out := new(Result)
-	err := c.cc.Invoke(ctx, Backend_GenerateImage_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/GenerateImage", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -142,7 +121,7 @@ func (c *backendClient) GenerateImage(ctx context.Context, in *GenerateImageRequ

 func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRequest, opts ...grpc.CallOption) (*TranscriptResult, error) {
 	out := new(TranscriptResult)
-	err := c.cc.Invoke(ctx, Backend_AudioTranscription_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/AudioTranscription", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -151,7 +130,7 @@ func (c *backendClient) AudioTranscription(ctx context.Context, in *TranscriptRe

 func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.CallOption) (*Result, error) {
 	out := new(Result)
-	err := c.cc.Invoke(ctx, Backend_TTS_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/TTS", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -160,7 +139,7 @@ func (c *backendClient) TTS(ctx context.Context, in *TTSRequest, opts ...grpc.Ca

 func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions, opts ...grpc.CallOption) (*TokenizationResponse, error) {
 	out := new(TokenizationResponse)
-	err := c.cc.Invoke(ctx, Backend_TokenizeString_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/TokenizeString", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -169,43 +148,7 @@ func (c *backendClient) TokenizeString(ctx context.Context, in *PredictOptions,

 func (c *backendClient) Status(ctx context.Context, in *HealthMessage, opts ...grpc.CallOption) (*StatusResponse, error) {
 	out := new(StatusResponse)
-	err := c.cc.Invoke(ctx, Backend_Status_FullMethodName, in, out, opts...)
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-func (c *backendClient) StoresSet(ctx context.Context, in *StoresSetOptions, opts ...grpc.CallOption) (*Result, error) {
-	out := new(Result)
-	err := c.cc.Invoke(ctx, Backend_StoresSet_FullMethodName, in, out, opts...)
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-func (c *backendClient) StoresDelete(ctx context.Context, in *StoresDeleteOptions, opts ...grpc.CallOption) (*Result, error) {
-	out := new(Result)
-	err := c.cc.Invoke(ctx, Backend_StoresDelete_FullMethodName, in, out, opts...)
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-func (c *backendClient) StoresGet(ctx context.Context, in *StoresGetOptions, opts ...grpc.CallOption) (*StoresGetResult, error) {
-	out := new(StoresGetResult)
-	err := c.cc.Invoke(ctx, Backend_StoresGet_FullMethodName, in, out, opts...)
-	if err != nil {
-		return nil, err
-	}
-	return out, nil
-}
-
-func (c *backendClient) StoresFind(ctx context.Context, in *StoresFindOptions, opts ...grpc.CallOption) (*StoresFindResult, error) {
-	out := new(StoresFindResult)
-	err := c.cc.Invoke(ctx, Backend_StoresFind_FullMethodName, in, out, opts...)
+	err := c.cc.Invoke(ctx, "/backend.Backend/Status", in, out, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -226,10 +169,6 @@ type BackendServer interface {
 	TTS(context.Context, *TTSRequest) (*Result, error)
 	TokenizeString(context.Context, *PredictOptions) (*TokenizationResponse, error)
 	Status(context.Context, *HealthMessage) (*StatusResponse, error)
-	StoresSet(context.Context, *StoresSetOptions) (*Result, error)
-	StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error)
-	StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error)
-	StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error)
 	mustEmbedUnimplementedBackendServer()
 }

@@ -267,18 +206,6 @@ func (UnimplementedBackendServer) TokenizeString(context.Context, *PredictOption
 func (UnimplementedBackendServer) Status(context.Context, *HealthMessage) (*StatusResponse, error) {
 	return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
 }
-func (UnimplementedBackendServer) StoresSet(context.Context, *StoresSetOptions) (*Result, error) {
-	return nil, status.Errorf(codes.Unimplemented, "method StoresSet not implemented")
-}
-func (UnimplementedBackendServer) StoresDelete(context.Context, *StoresDeleteOptions) (*Result, error) {
-	return nil, status.Errorf(codes.Unimplemented, "method StoresDelete not implemented")
-}
-func (UnimplementedBackendServer) StoresGet(context.Context, *StoresGetOptions) (*StoresGetResult, error) {
-	return nil, status.Errorf(codes.Unimplemented, "method StoresGet not implemented")
-}
-func (UnimplementedBackendServer) StoresFind(context.Context, *StoresFindOptions) (*StoresFindResult, error) {
-	return nil, status.Errorf(codes.Unimplemented, "method StoresFind not implemented")
-}
 func (UnimplementedBackendServer) mustEmbedUnimplementedBackendServer() {}

 // UnsafeBackendServer may be embedded to opt out of forward compatibility for this service.
@@ -302,7 +229,7 @@ func _Backend_Health_Handler(srv interface{}, ctx context.Context, dec func(inte
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_Health_FullMethodName,
+		FullMethod: "/backend.Backend/Health",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).Health(ctx, req.(*HealthMessage))
@@ -320,7 +247,7 @@ func _Backend_Predict_Handler(srv interface{}, ctx context.Context, dec func(int
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_Predict_FullMethodName,
+		FullMethod: "/backend.Backend/Predict",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).Predict(ctx, req.(*PredictOptions))
@@ -338,7 +265,7 @@ func _Backend_LoadModel_Handler(srv interface{}, ctx context.Context, dec func(i
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_LoadModel_FullMethodName,
+		FullMethod: "/backend.Backend/LoadModel",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).LoadModel(ctx, req.(*ModelOptions))
@@ -377,7 +304,7 @@ func _Backend_Embedding_Handler(srv interface{}, ctx context.Context, dec func(i
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_Embedding_FullMethodName,
+		FullMethod: "/backend.Backend/Embedding",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).Embedding(ctx, req.(*PredictOptions))
@@ -395,7 +322,7 @@ func _Backend_GenerateImage_Handler(srv interface{}, ctx context.Context, dec fu
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_GenerateImage_FullMethodName,
+		FullMethod: "/backend.Backend/GenerateImage",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).GenerateImage(ctx, req.(*GenerateImageRequest))
@@ -413,7 +340,7 @@ func _Backend_AudioTranscription_Handler(srv interface{}, ctx context.Context, d
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_AudioTranscription_FullMethodName,
+		FullMethod: "/backend.Backend/AudioTranscription",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).AudioTranscription(ctx, req.(*TranscriptRequest))
@@ -431,7 +358,7 @@ func _Backend_TTS_Handler(srv interface{}, ctx context.Context, dec func(interfa
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_TTS_FullMethodName,
+		FullMethod: "/backend.Backend/TTS",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).TTS(ctx, req.(*TTSRequest))
@@ -449,7 +376,7 @@ func _Backend_TokenizeString_Handler(srv interface{}, ctx context.Context, dec f
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_TokenizeString_FullMethodName,
+		FullMethod: "/backend.Backend/TokenizeString",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).TokenizeString(ctx, req.(*PredictOptions))
@@ -467,7 +394,7 @@ func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(inte
 	}
 	info := &grpc.UnaryServerInfo{
 		Server:     srv,
-		FullMethod: Backend_Status_FullMethodName,
+		FullMethod: "/backend.Backend/Status",
 	}
 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 		return srv.(BackendServer).Status(ctx, req.(*HealthMessage))
@@ -475,78 +402,6 @@ func _Backend_Status_Handler(srv interface{}, ctx context.Context, dec func(inte
 	return interceptor(ctx, in, info, handler)
 }

-func _Backend_StoresSet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
-	in := new(StoresSetOptions)
-	if err := dec(in); err != nil {
-		return nil, err
-	}
-	if interceptor == nil {
-		return srv.(BackendServer).StoresSet(ctx, in)
-	}
-	info := &grpc.UnaryServerInfo{
-		Server:     srv,
-		FullMethod: Backend_StoresSet_FullMethodName,
-	}
-	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
-		return srv.(BackendServer).StoresSet(ctx, req.(*StoresSetOptions))
-	}
-	return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_StoresDelete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
-	in := new(StoresDeleteOptions)
-	if err := dec(in); err != nil {
-		return nil, err
-	}
-	if interceptor == nil {
-		return srv.(BackendServer).StoresDelete(ctx, in)
-	}
-	info := &grpc.UnaryServerInfo{
-		Server:     srv,
-		FullMethod: Backend_StoresDelete_FullMethodName,
-	}
-	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
-		return srv.(BackendServer).StoresDelete(ctx, req.(*StoresDeleteOptions))
-	}
-	return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_StoresGet_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
-	in := new(StoresGetOptions)
-	if err := dec(in); err != nil {
-		return nil, err
-	}
-	if interceptor == nil {
-		return srv.(BackendServer).StoresGet(ctx, in)
-	}
-	info := &grpc.UnaryServerInfo{
-		Server:     srv,
-		FullMethod: Backend_StoresGet_FullMethodName,
-	}
-	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
-		return srv.(BackendServer).StoresGet(ctx, req.(*StoresGetOptions))
-	}
-	return interceptor(ctx, in, info, handler)
-}
-
-func _Backend_StoresFind_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
-	in := new(StoresFindOptions)
-	if err := dec(in); err != nil {
-		return nil, err
-	}
-	if interceptor == nil {
-		return srv.(BackendServer).StoresFind(ctx, in)
-	}
-	info := &grpc.UnaryServerInfo{
-		Server:     srv,
-		FullMethod: Backend_StoresFind_FullMethodName,
-	}
-	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
-		return srv.(BackendServer).StoresFind(ctx, req.(*StoresFindOptions))
-	}
-	return interceptor(ctx, in, info, handler)
-}
-
 // Backend_ServiceDesc is the grpc.ServiceDesc for Backend service.
 // It's only intended for direct use with grpc.RegisterService,
 // and not to be introspected or modified (even as a copy)
@@ -590,22 +445,6 @@ var Backend_ServiceDesc = grpc.ServiceDesc{
 			MethodName: "Status",
 			Handler:    _Backend_Status_Handler,
 		},
-		{
-			MethodName: "StoresSet",
-			Handler:    _Backend_StoresSet_Handler,
-		},
-		{
-			MethodName: "StoresDelete",
-			Handler:    _Backend_StoresDelete_Handler,
-		},
-		{
-			MethodName: "StoresGet",
-			Handler:    _Backend_StoresGet_Handler,
-		},
-		{
-			MethodName: "StoresFind",
-			Handler:    _Backend_StoresFind_Handler,
-		},
 	},
 	Streams: []grpc.StreamDesc{
 		{
--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@@ -167,54 +167,6 @@ func (s *server) Status(ctx context.Context, in *pb.HealthMessage) (*pb.StatusRe
 	return &res, nil
 }

-func (s *server) StoresSet(ctx context.Context, in *pb.StoresSetOptions) (*pb.Result, error) {
-	if s.llm.Locking() {
-		s.llm.Lock()
-		defer s.llm.Unlock()
-	}
-	err := s.llm.StoresSet(in)
-	if err != nil {
-		return &pb.Result{Message: fmt.Sprintf("Error setting entry: %s", err.Error()), Success: false}, err
-	}
-	return &pb.Result{Message: "Set key", Success: true}, nil
-}
-
-func (s *server) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions) (*pb.Result, error) {
-	if s.llm.Locking() {
-		s.llm.Lock()
-		defer s.llm.Unlock()
-	}
-	err := s.llm.StoresDelete(in)
-	if err != nil {
-		return &pb.Result{Message: fmt.Sprintf("Error deleting entry: %s", err.Error()), Success: false}, err
-	}
-	return &pb.Result{Message: "Deleted key", Success: true}, nil
-}
-
-func (s *server) StoresGet(ctx context.Context, in *pb.StoresGetOptions) (*pb.StoresGetResult, error) {
-	if s.llm.Locking() {
-		s.llm.Lock()
-		defer s.llm.Unlock()
-	}
-	res, err := s.llm.StoresGet(in)
-	if err != nil {
-		return nil, err
-	}
-	return &res, nil
-}
-
-func (s *server) StoresFind(ctx context.Context, in *pb.StoresFindOptions) (*pb.StoresFindResult, error) {
-	if s.llm.Locking() {
-		s.llm.Lock()
-		defer s.llm.Unlock()
-	}
-	res, err := s.llm.StoresFind(in)
-	if err != nil {
-		return nil, err
-	}
-	return &res, nil
-}
-
 func StartServer(address string, model LLM) error {
 	lis, err := net.Listen("tcp", address)
 	if err != nil {
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -15,12 +15,12 @@ import (
 )

 var Aliases map[string]string = map[string]string{
-	"go-llama": LLamaCPP,
+	"go-llama": GoLlamaBackend,
 	"llama":    LLamaCPP,
-	"embedded-store": LocalStoreBackend,
 }

 const (
+	GoLlamaBackend      = "llama"
 	LlamaGGML           = "llama-ggml"
 	LLamaCPP            = "llama-cpp"
 	Gpt4AllLlamaBackend = "gpt4all-llama"
@@ -36,12 +36,14 @@ const (
 	PiperBackend           = "piper"
 	LCHuggingFaceBackend   = "langchain-huggingface"

-	LocalStoreBackend = "local-store"
+	// External Backends that need special handling within LocalAI:
+	TransformersMusicGen = "transformers-musicgen"
 )

 var AutoLoadBackends []string = []string{
 	LLamaCPP,
 	LlamaGGML,
+	GoLlamaBackend,
 	Gpt4All,
 	BertEmbeddingsBackend,
 	RwkvBackend,
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -10,7 +10,6 @@ import (
 	"sync"
 	"text/template"

-	"github.com/Masterminds/sprig/v3"
 	grammar "github.com/go-skynet/LocalAI/pkg/grammar"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	process "github.com/mudler/go-processmanager"
@@ -37,9 +36,6 @@ type ChatMessageTemplateData struct {
 	FunctionName string
 	Content      string
 	MessageIndex int
-	Function     bool
-	FunctionCall interface{}
-	LastMessage  bool
 }

 // Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
@@ -265,7 +261,7 @@ func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateN
 	}

 	// Parse the template
-	tmpl, err := template.New("prompt").Funcs(sprig.FuncMap()).Parse(dat)
+	tmpl, err := template.New("prompt").Parse(dat)
 	if err != nil {
 		return err
 	}
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -60,23 +60,7 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model
 				}
 			}
 		default:
-			if _, err := os.Stat(url); err == nil {
-				log.Debug().Msgf("[startup] resolved local model: %s", url)
-				// copy to modelPath
-				md5Name := utils.MD5(url)
-
-				modelYAML, err := os.ReadFile(url)
-				if err != nil {
-					log.Error().Msgf("error loading model: %s", err.Error())
-					continue
-				}
-
-				if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil {
-					log.Error().Msgf("error loading model: %s", err.Error())
-				}
-			} else {
-				log.Warn().Msgf("[startup] failed resolving model '%s'", url)
-			}
+			log.Warn().Msgf("[startup] failed resolving model '%s'", url)
 		}
 	}
 }
--- a/pkg/store/client.go
+++ b/pkg/store/client.go
@@ -1,155 +0,0 @@
-package store
-
-import (
-	"context"
-	"fmt"
-
-	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
-)
-
-// Wrapper for the GRPC client so that simple use cases are handled without verbosity
-
-// SetCols sets multiple key-value pairs in the store
-// It's in columnar format so that keys[i] is associated with values[i]
-func SetCols(ctx context.Context, c grpc.Backend, keys [][]float32, values [][]byte) error {
-	protoKeys := make([]*proto.StoresKey, len(keys))
-	for i, k := range keys {
-		protoKeys[i] = &proto.StoresKey{
-			Floats: k,
-		}
-	}
-	protoValues := make([]*proto.StoresValue, len(values))
-	for i, v := range values {
-		protoValues[i] = &proto.StoresValue{
-			Bytes: v,
-		}
-	}
-	setOpts := &proto.StoresSetOptions{
-		Keys:   protoKeys,
-		Values: protoValues,
-	}
-
-	res, err := c.StoresSet(ctx, setOpts)
-	if err != nil {
-		return err
-	}
-
-	if res.Success {
-		return nil
-	}
-
-	return fmt.Errorf("failed to set keys: %v", res.Message)
-}
-
-// SetSingle sets a single key-value pair in the store
-// Don't call this in a tight loop, instead use SetCols
-func SetSingle(ctx context.Context, c grpc.Backend, key []float32, value []byte) error {
-	return SetCols(ctx, c, [][]float32{key}, [][]byte{value})
-}
-
-// DeleteCols deletes multiple key-value pairs from the store
-// It's in columnar format so that keys[i] is associated with values[i]
-func DeleteCols(ctx context.Context, c grpc.Backend, keys [][]float32) error {
-	protoKeys := make([]*proto.StoresKey, len(keys))
-	for i, k := range keys {
-		protoKeys[i] = &proto.StoresKey{
-			Floats: k,
-		}
-	}
-	deleteOpts := &proto.StoresDeleteOptions{
-		Keys: protoKeys,
-	}
-
-	res, err := c.StoresDelete(ctx, deleteOpts)
-	if err != nil {
-		return err
-	}
-
-	if res.Success {
-		return nil
-	}
-
-	return fmt.Errorf("failed to delete keys: %v", res.Message)
-}
-
-// DeleteSingle deletes a single key-value pair from the store
-// Don't call this in a tight loop, instead use DeleteCols
-func DeleteSingle(ctx context.Context, c grpc.Backend, key []float32) error {
-	return DeleteCols(ctx, c, [][]float32{key})
-}
-
-// GetCols gets multiple key-value pairs from the store
-// It's in columnar format so that keys[i] is associated with values[i]
-// Be warned the keys are sorted and will be returned in a different order than they were input
-// There is no guarantee as to how the keys are sorted
-func GetCols(ctx context.Context, c grpc.Backend, keys [][]float32) ([][]float32, [][]byte, error) {
-	protoKeys := make([]*proto.StoresKey, len(keys))
-	for i, k := range keys {
-		protoKeys[i] = &proto.StoresKey{
-			Floats: k,
-		}
-	}
-	getOpts := &proto.StoresGetOptions{
-		Keys: protoKeys,
-	}
-
-	res, err := c.StoresGet(ctx, getOpts)
-	if err != nil {
-		return nil, nil, err
-	}
-
-	ks := make([][]float32, len(res.Keys))
-	for i, k := range res.Keys {
-		ks[i] = k.Floats
-	}
-	vs := make([][]byte, len(res.Values))
-	for i, v := range res.Values {
-		vs[i] = v.Bytes
-	}
-
-	return ks, vs, nil
-}
-
-// GetSingle gets a single key-value pair from the store
-// Don't call this in a tight loop, instead use GetCols
-func GetSingle(ctx context.Context, c grpc.Backend, key []float32) ([]byte, error) {
-	_, values, err := GetCols(ctx, c, [][]float32{key})
-	if err != nil {
-		return nil, err
-	}
-
-	if len(values) > 0 {
-		return values[0], nil
-	}
-
-	return nil, fmt.Errorf("failed to get key")
-}
-
-// Find similar keys to the given key. Returns the keys, values, and similarities
-func Find(ctx context.Context, c grpc.Backend, key []float32, topk int) ([][]float32, [][]byte, []float32, error) {
-	findOpts := &proto.StoresFindOptions{
-		Key: &proto.StoresKey{
-			Floats: key,
-		},
-		TopK: int32(topk),
-	}
-
-	res, err := c.StoresFind(ctx, findOpts)
-	if err != nil {
-		return nil, nil, nil, err
-	}
-
-	ks := make([][]float32, len(res.Keys))
-	vs := make([][]byte, len(res.Values))
-
-	for i, k := range res.Keys {
-		ks[i] = k.Floats
-	}
-
-	for i, v := range res.Values {
-		vs[i] = v.Bytes
-	}
-
-	return ks, vs, res.Similarities, nil
-}
--- a/tests/e2e-aio/e2e_suite_test.go
+++ b/tests/e2e-aio/e2e_suite_test.go
@@ -1,97 +0,0 @@
-package e2e_test
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"runtime"
-	"testing"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-	"github.com/ory/dockertest/v3"
-	"github.com/ory/dockertest/v3/docker"
-	"github.com/sashabaranov/go-openai"
-)
-
-var pool *dockertest.Pool
-var resource *dockertest.Resource
-var client *openai.Client
-
-var containerImage = os.Getenv("LOCALAI_IMAGE")
-var containerImageTag = os.Getenv("LOCALAI_IMAGE_TAG")
-var modelsDir = os.Getenv("LOCALAI_MODELS_DIR")
-var apiPort = os.Getenv("LOCALAI_API_PORT")
-
-func TestLocalAI(t *testing.T) {
-	RegisterFailHandler(Fail)
-	RunSpecs(t, "LocalAI E2E test suite")
-}
-
-var _ = BeforeSuite(func() {
-
-	if containerImage == "" {
-		Fail("LOCALAI_IMAGE is not set")
-	}
-	if containerImageTag == "" {
-		Fail("LOCALAI_IMAGE_TAG is not set")
-	}
-	if apiPort == "" {
-		apiPort = "8080"
-	}
-
-	p, err := dockertest.NewPool("")
-	Expect(err).To(Not(HaveOccurred()))
-	Expect(p.Client.Ping()).To(Succeed())
-
-	pool = p
-
-	// get cwd
-	cwd, err := os.Getwd()
-	Expect(err).To(Not(HaveOccurred()))
-	md := cwd + "/models"
-
-	if modelsDir != "" {
-		md = modelsDir
-	}
-
-	proc := runtime.NumCPU()
-	options := &dockertest.RunOptions{
-		Repository: containerImage,
-		Tag:        containerImageTag,
-		//	Cmd:        []string{"server", "/data"},
-		PortBindings: map[docker.Port][]docker.PortBinding{
-			"8080/tcp": []docker.PortBinding{{HostPort: apiPort}},
-		},
-		Env:    []string{"MODELS_PATH=/models", "DEBUG=true", "THREADS=" + fmt.Sprint(proc)},
-		Mounts: []string{md + ":/models"},
-	}
-
-	r, err := pool.RunWithOptions(options)
-	Expect(err).To(Not(HaveOccurred()))
-
-	resource = r
-
-	defaultConfig := openai.DefaultConfig("")
-	defaultConfig.BaseURL = "http://localhost:" + apiPort + "/v1"
-
-	// Wait for API to be ready
-	client = openai.NewClientWithConfig(defaultConfig)
-
-	Eventually(func() error {
-		_, err := client.ListModels(context.TODO())
-		return err
-	}, "20m").ShouldNot(HaveOccurred())
-})
-
-var _ = AfterSuite(func() {
-	Expect(pool.Purge(resource)).To(Succeed())
-	//dat, err := os.ReadFile(resource.Container.LogPath)
-	//Expect(err).To(Not(HaveOccurred()))
-	//Expect(string(dat)).To(ContainSubstring("GRPC Service Ready"))
-	//fmt.Println(string(dat))
-})
-
-var _ = AfterEach(func() {
-	//Expect(dbClient.Clear()).To(Succeed())
-})
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -1,152 +0,0 @@
-package e2e_test
-
-import (
-	"context"
-	"fmt"
-	"io"
-	"net/http"
-	"os"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-
-	"github.com/sashabaranov/go-openai"
-)
-
-var _ = Describe("E2E test", func() {
-	Context("Generating", func() {
-		BeforeEach(func() {
-			//
-		})
-
-		// Check that the GPU was used
-		AfterEach(func() {
-			//
-		})
-
-		Context("text", func() {
-			It("correctly", func() {
-				model := "gpt-4"
-				resp, err := client.CreateChatCompletion(context.TODO(),
-					openai.ChatCompletionRequest{
-						Model: model, Messages: []openai.ChatCompletionMessage{
-							{
-								Role:    "user",
-								Content: "How much is 2+2?",
-							},
-						}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
-				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
-			})
-		})
-		Context("images", func() {
-			It("correctly", func() {
-				resp, err := client.CreateImage(context.TODO(),
-					openai.ImageRequest{
-						Prompt: "test",
-						Size:   openai.CreateImageSize512x512,
-						//ResponseFormat: openai.CreateImageResponseFormatURL,
-					},
-				)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
-				Expect(resp.Data[0].URL).To(ContainSubstring("http://localhost:8080"), fmt.Sprint(resp.Data[0].URL))
-			})
-		})
-		Context("embeddings", func() {
-			It("correctly", func() {
-				resp, err := client.CreateEmbeddings(context.TODO(),
-					openai.EmbeddingRequestStrings{
-						Input: []string{"doc"},
-						Model: openai.AdaEmbeddingV2,
-					},
-				)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
-				Expect(resp.Data[0].Embedding).ToNot(BeEmpty())
-			})
-		})
-		Context("vision", func() {
-			It("correctly", func() {
-				model := "gpt-4-vision-preview"
-				resp, err := client.CreateChatCompletion(context.TODO(),
-					openai.ChatCompletionRequest{
-						Model: model, Messages: []openai.ChatCompletionMessage{
-							{
-
-								Role: "user",
-								MultiContent: []openai.ChatMessagePart{
-									{
-										Type: openai.ChatMessagePartTypeText,
-										Text: "What is in the image?",
-									},
-									{
-										Type: openai.ChatMessagePartTypeImageURL,
-										ImageURL: &openai.ChatMessageImageURL{
-											URL:    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
-											Detail: openai.ImageURLDetailLow,
-										},
-									},
-								},
-							},
-						}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
-				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("wooden"), ContainSubstring("grass")), fmt.Sprint(resp.Choices[0].Message.Content))
-			})
-		})
-		Context("text to audio", func() {
-			It("correctly", func() {
-				res, err := client.CreateSpeech(context.Background(), openai.CreateSpeechRequest{
-					Model: openai.TTSModel1,
-					Input: "Hello!",
-					Voice: openai.VoiceAlloy,
-				})
-				Expect(err).ToNot(HaveOccurred())
-				defer res.Close()
-
-				_, err = io.ReadAll(res)
-				Expect(err).ToNot(HaveOccurred())
-
-			})
-		})
-		Context("audio to text", func() {
-			It("correctly", func() {
-
-				downloadURL := "https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav"
-				file, err := downloadHttpFile(downloadURL)
-				Expect(err).ToNot(HaveOccurred())
-
-				req := openai.AudioRequest{
-					Model:    openai.Whisper1,
-					FilePath: file,
-				}
-				resp, err := client.CreateTranscription(context.Background(), req)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(resp.Text).To(ContainSubstring("This is the"), fmt.Sprint(resp.Text))
-			})
-		})
-	})
-})
-
-func downloadHttpFile(url string) (string, error) {
-	resp, err := http.Get(url)
-	if err != nil {
-		return "", err
-	}
-	defer resp.Body.Close()
-
-	tmpfile, err := os.CreateTemp("", "example")
-	if err != nil {
-		return "", err
-	}
-	defer tmpfile.Close()
-
-	_, err = io.Copy(tmpfile, resp.Body)
-	if err != nil {
-		return "", err
-	}
-
-	return tmpfile.Name(), nil
-}
--- a/tests/integration/integration_suite_test.go
+++ b/tests/integration/integration_suite_test.go
@@ -1,17 +0,0 @@
-package integration_test
-
-import (
-	"os"
-	"testing"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-	"github.com/rs/zerolog"
-	"github.com/rs/zerolog/log"
-)
-
-func TestLocalAI(t *testing.T) {
-	log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
-	RegisterFailHandler(Fail)
-	RunSpecs(t, "LocalAI test suite")
-}
--- a/tests/integration/stores_test.go
+++ b/tests/integration/stores_test.go
@@ -1,228 +0,0 @@
-package integration_test
-
-import (
-	"context"
-	"embed"
-	"math"
-	"os"
-	"path/filepath"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-	"github.com/rs/zerolog"
-	"github.com/rs/zerolog/log"
-
-	"github.com/go-skynet/LocalAI/core/config"
-	"github.com/go-skynet/LocalAI/pkg/assets"
-	"github.com/go-skynet/LocalAI/pkg/grpc"
-	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/store"
-)
-
-//go:embed backend-assets/*
-var backendAssets embed.FS
-
-var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
-	Context("Embedded Store get,set and delete", func() {
-		var sl *model.ModelLoader
-		var sc grpc.Backend
-		var tmpdir string
-
-		BeforeEach(func() {
-			var err error
-
-			zerolog.SetGlobalLevel(zerolog.DebugLevel)
-
-			tmpdir, err = os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-			backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
-			err = os.Mkdir(backendAssetsDir, 0755)
-			Expect(err).ToNot(HaveOccurred())
-
-			err = assets.ExtractFiles(backendAssets, backendAssetsDir)
-			Expect(err).ToNot(HaveOccurred())
-
-			debug := true
-
-			bc := config.BackendConfig{
-				Name:    "store test",
-				Debug:   &debug,
-				Backend: model.LocalStoreBackend,
-			}
-
-			storeOpts := []model.Option{
-				model.WithBackendString(bc.Backend),
-				model.WithAssetDir(backendAssetsDir),
-				model.WithModel("test"),
-			}
-
-			sl = model.NewModelLoader("")
-			sc, err = sl.BackendLoader(storeOpts...)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(sc).ToNot(BeNil())
-		})
-
-		AfterEach(func() {
-			sl.StopAllGRPC()
-			err := os.RemoveAll(tmpdir)
-			Expect(err).ToNot(HaveOccurred())
-		})
-
-		It("should be able to set a key", func() {
-			err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
-			Expect(err).ToNot(HaveOccurred())
-		})
-
-		It("should be able to set keys", func() {
-			err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}}, [][]byte{[]byte("test1"), []byte("test2")})
-			Expect(err).ToNot(HaveOccurred())
-
-			err = store.SetCols(context.Background(), sc, [][]float32{{0.7, 0.8, 0.9}, {0.10, 0.11, 0.12}}, [][]byte{[]byte("test3"), []byte("test4")})
-			Expect(err).ToNot(HaveOccurred())
-		})
-
-		It("should be able to get a key", func() {
-			err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
-			Expect(err).ToNot(HaveOccurred())
-
-			val, err := store.GetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(val).To(Equal([]byte("test")))
-		})
-
-		It("should be able to get keys", func() {
-			//set 3 entries
-			err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")})
-			Expect(err).ToNot(HaveOccurred())
-
-			//get 3 entries
-			keys, vals, err := store.GetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(keys).To(HaveLen(3))
-			Expect(vals).To(HaveLen(3))
-			for i, k := range keys {
-				v := vals[i]
-
-				if k[0] == 0.1 && k[1] == 0.2 && k[2] == 0.3 {
-					Expect(v).To(Equal([]byte("test1")))
-				} else if k[0] == 0.4 && k[1] == 0.5 && k[2] == 0.6 {
-					Expect(v).To(Equal([]byte("test2")))
-				} else {
-					Expect(k).To(Equal([]float32{0.7, 0.8, 0.9}))
-					Expect(v).To(Equal([]byte("test3")))
-				}
-			}
-
-			//get 2 entries
-			keys, vals, err = store.GetCols(context.Background(), sc, [][]float32{{0.7, 0.8, 0.9}, {0.1, 0.2, 0.3}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(keys).To(HaveLen(2))
-			Expect(vals).To(HaveLen(2))
-			for i, k := range keys {
-				v := vals[i]
-
-				if k[0] == 0.1 && k[1] == 0.2 && k[2] == 0.3 {
-					Expect(v).To(Equal([]byte("test1")))
-				} else {
-					Expect(k).To(Equal([]float32{0.7, 0.8, 0.9}))
-					Expect(v).To(Equal([]byte("test3")))
-				}
-			}
-		})
-
-		It("should be able to delete a key", func() {
-			err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
-			Expect(err).ToNot(HaveOccurred())
-
-			err = store.DeleteSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3})
-			Expect(err).ToNot(HaveOccurred())
-
-			val, _ := store.GetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3})
-			Expect(val).To(BeNil())
-		})
-
-		It("should be able to delete keys", func() {
-			//set 3 entries
-			err := store.SetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.4, 0.5, 0.6}, {0.7, 0.8, 0.9}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")})
-			Expect(err).ToNot(HaveOccurred())
-
-			//delete 2 entries
-			err = store.DeleteCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.7, 0.8, 0.9}})
-			Expect(err).ToNot(HaveOccurred())
-
-			//get 1 entry
-			keys, vals, err := store.GetCols(context.Background(), sc, [][]float32{{0.4, 0.5, 0.6}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(keys).To(HaveLen(1))
-			Expect(vals).To(HaveLen(1))
-			Expect(keys[0]).To(Equal([]float32{0.4, 0.5, 0.6}))
-			Expect(vals[0]).To(Equal([]byte("test2")))
-
-			//get deleted entries
-			keys, vals, err = store.GetCols(context.Background(), sc, [][]float32{{0.1, 0.2, 0.3}, {0.7, 0.8, 0.9}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(keys).To(HaveLen(0))
-			Expect(vals).To(HaveLen(0))
-		})
-
-		It("should be able to find smilar keys", func() {
-			// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
-			err := store.SetCols(context.Background(), sc, [][]float32{{0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}, [][]byte{[]byte("test1"), []byte("test2"), []byte("test3")})
-			Expect(err).ToNot(HaveOccurred())
-
-			// find similar keys
-			keys, vals, sims, err := store.Find(context.Background(), sc, []float32{0.1, 0.3, 0.5}, 2)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(keys).To(HaveLen(2))
-			Expect(vals).To(HaveLen(2))
-			Expect(sims).To(HaveLen(2))
-
-			for i, k := range keys {
-				s := sims[i]
-				log.Debug().Float32("similarity", s).Msgf("key: %v", k)
-			}
-
-			Expect(keys[0]).To(Equal([]float32{0.5, 0.5, 0.5}))
-			Expect(vals[0]).To(Equal([]byte("test1")))
-			Expect(keys[1]).To(Equal([]float32{0.6, 0.6, -0.6}))
-		})
-
-		It("should be able to find similar normalized keys", func() {
-			// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
-			keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
-			vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
-			// normalize the keys
-			for i, k := range keys {
-				norm := float64(0)
-				for _, x := range k {
-					norm += float64(x * x)
-				}
-				norm = math.Sqrt(norm)
-				for j, x := range k {
-					keys[i][j] = x / float32(norm)
-				}
-			}
-
-			err := store.SetCols(context.Background(), sc, keys, vals)
-			Expect(err).ToNot(HaveOccurred())
-
-			// find similar keys
-			ks, vals, sims, err := store.Find(context.Background(), sc, keys[0], 3)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(ks).To(HaveLen(3))
-			Expect(vals).To(HaveLen(3))
-			Expect(sims).To(HaveLen(3))
-
-			for i, k := range ks {
-				s := sims[i]
-				log.Debug().Float32("similarity", s).Msgf("key: %v", k)
-			}
-
-			Expect(ks[0]).To(Equal(keys[0]))
-			Expect(vals[0]).To(Equal(vals[0]))
-			Expect(sims[0]).To(BeNumerically("~", 1, 0.0001))
-			Expect(ks[1]).To(Equal(keys[1]))
-			Expect(vals[1]).To(Equal(vals[1]))
-		})
-	})
-})
--- a/tests/models_fixtures/config.yaml
+++ b/tests/models_fixtures/config.yaml
@@ -1,6 +1,6 @@
 - name: list1
  parameters:
-    model: testmodel.ggml
+    model: testmodel
    top_p: 80
    top_k: 0.9
    temperature: 0.1
@@ -19,7 +19,7 @@
    top_p: 80
    top_k: 0.9
    temperature: 0.1
-    model: testmodel.ggml
+    model: testmodel
  context_size: 200
  stopwords:
  - "HUMAN:"
--- a/tests/models_fixtures/gpt4.yaml
+++ b/tests/models_fixtures/gpt4.yaml
@@ -1,6 +1,6 @@
 name: gpt4all
 parameters:
-  model: testmodel.ggml
+  model: testmodel
  top_p: 80
  top_k: 0.9
  temperature: 0.1
--- a/tests/models_fixtures/gpt4_2.yaml
+++ b/tests/models_fixtures/gpt4_2.yaml
@@ -1,6 +1,6 @@
 name: gpt4all-2
 parameters:
-  model: testmodel.ggml
+  model: testmodel
  top_p: 80
  top_k: 0.9
  temperature: 0.1
Author	SHA1	Message	Date
Ettore Di Giacinto	5b8d6a31e2	docs(transformers): add docs section about transformers	2024-03-15 18:02:15 +01:00
Ettore Di Giacinto	f0752be4aa	fix: adapt tts CLI	2024-03-14 19:24:50 +01:00
Ettore Di Giacinto	bafc9effad	feat(openai/tts): compat layer with openai tts Fixes: #1276	2024-03-14 18:15:28 +01:00
Ettore Di Giacinto	d2934dd69f	feat(elevenlabs): map elevenlabs API support to TTS This allows elevenlabs Clients to work automatically with LocalAI by supporting the elevenlabs API. The elevenlabs server endpoint is implemented such as it is wired to the TTS endpoints. Fixes: https://github.com/mudler/LocalAI/issues/1809	2024-03-14 18:12:47 +01:00