Stores to chromem (WIP)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 11:13:31 -05:00 · 2025-01-21 10:35:01 +01:00
93 changed files with 1484 additions and 1376 deletions
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -7,7 +7,7 @@ services:
      args:
      - FFMPEG=true
      - IMAGE_TYPE=extras
-      - GO_TAGS=p2p tts
+      - GO_TAGS=stablediffusion p2p tts
    env_file:
      - ../.env
    ports:
--- a/.env
+++ b/.env
@@ -38,12 +38,12 @@
 ## Uncomment and set to true to enable rebuilding from source
 # REBUILD=true

-## Enable go tags, available: p2p, tts
-## p2p: enable distributed inferencing
+## Enable go tags, available: stablediffusion, tts
+## stablediffusion: image generation with stablediffusion
 ## tts: enables text-to-speech with go-piper 
 ## (requires REBUILD=true)
 #
-# GO_TAGS=p2p
+# GO_TAGS=stablediffusion

 ## Path where to store generated images
 # LOCALAI_IMAGE_PATH=/tmp/generated/images
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -237,7 +237,40 @@ jobs:
          detached: true
          connect-timeout-seconds: 180
          limit-access-to-actor: true
-
+  build-stablediffusion:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.21.x'
+          cache: false
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+      - name: Build stablediffusion
+        run: |
+          export PATH=$PATH:$GOPATH/bin
+          make backend-assets/grpc/stablediffusion
+          mkdir -p release && cp backend-assets/grpc/stablediffusion release
+        env:
+          GO_TAGS: stablediffusion
+      - uses: actions/upload-artifact@v4
+        with:
+          name: stablediffusion
+          path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*

  build-macOS-x86_64:
    runs-on: macos-13
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -78,6 +78,57 @@ jobs:
          make --jobs=5 --output-sync=target -C backend/python/diffusers
          make --jobs=5 --output-sync=target -C backend/python/diffusers test

+  tests-parler-tts:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+          sudo apt-get install -y libopencv-dev
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
+
+      - name: Test parler-tts
+        run: |
+           make --jobs=5 --output-sync=target -C backend/python/parler-tts
+           make --jobs=5 --output-sync=target -C backend/python/parler-tts test
+      - name: Setup tmate session if tests fail
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3.19
+        with:
+          detached: true
+          connect-timeout-seconds: 180
+          limit-access-to-actor: true
+
+  tests-openvoice:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg
+          # Install UV
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          sudo apt-get install -y ca-certificates cmake curl patch python3-pip
+          sudo apt-get install -y libopencv-dev
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
+
+      - name: Test openvoice
+        run: |
+           make --jobs=5 --output-sync=target -C backend/python/openvoice
+           make --jobs=5 --output-sync=target -C backend/python/openvoice test
+
  # tests-transformers-musicgen:
  #   runs-on: ubuntu-latest
  #   steps:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -105,7 +105,9 @@ jobs:
          # Pre-build piper before we start tests in order to have shared libraries in place
          make sources/go-piper && \
          GO_TAGS="tts" make -C sources/go-piper piper.o && \
-          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
+          sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
+          # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
+          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
        env:
          CUDA_VERSION: 12-4
      - name: Cache grpc
@@ -127,7 +129,7 @@ jobs:
          cd grpc && cd cmake/build && sudo make --jobs 5 install
      - name: Test
        run: |
-          PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
+          PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.19
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -26,7 +26,7 @@
                "LOCALAI_P2P": "true",
                "LOCALAI_FEDERATED": "true"
            },
-            "buildFlags": ["-tags", "p2p tts", "-v"],
+            "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
            "envFile": "${workspaceFolder}/.env",
            "cwd": "${workspaceRoot}"
        }
--- a/53
+++ b/53
@@ -15,7 +15,8 @@ ARG TARGETARCH
 ARG TARGETVARIANT

 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+

 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
@@ -68,10 +69,14 @@ ENV PATH=/opt/rocm/bin:${PATH}
 # OpenBLAS requirements and stable diffusion
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        libopenblas-dev && \
+        libopenblas-dev \
+        libopencv-dev && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

+# Set up OpenCV
+RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+
 WORKDIR /build

 ###################################
@@ -246,7 +251,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall

 FROM requirements-drivers AS builder-base

-ARG GO_TAGS="tts p2p"
+ARG GO_TAGS="stablediffusion tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
@@ -280,12 +285,35 @@ RUN <<EOT bash
    fi
 EOT

+
+###################################
+###################################
+
+# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
+# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
+FROM builder-base AS builder-sd
+
+# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
+COPY Makefile .
+COPY go.mod .
+COPY go.sum .
+COPY backend/backend.proto ./backend/backend.proto
+COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
+COPY pkg/grpc ./pkg/grpc
+COPY pkg/stablediffusion ./pkg/stablediffusion
+RUN git init
+RUN make sources/go-stable-diffusion
+RUN touch prepare-sources
+
+# Actually build the backend
+RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
+
 ###################################
 ###################################

 # The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
 # Adjustments to the build process should likely be made here.
-FROM builder-base AS builder
+FROM builder-sd AS builder

 # Install the pre-built GRPC
 COPY --from=grpc /opt/grpc /usr/local
@@ -303,7 +331,7 @@ RUN make prepare
 ## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
 ## (both will use CUDA or hipblas for the actual computation)
 RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
-        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
+        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
    else \
        make build; \
    fi
@@ -325,6 +353,8 @@ ARG FFMPEG

 COPY --from=grpc /opt/grpc /usr/local

+COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
+
 COPY .devcontainer-scripts /.devcontainer-scripts

 # Add FFmpeg
@@ -397,6 +427,9 @@ COPY --from=builder /build/local-ai ./
 # Copy shared libraries for piper
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/

+# do not let stablediffusion rebuild (requires an older version of absl)
+COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
+
 # Change the shell to bash so we can use [[ tests below
 SHELL ["/bin/bash", "-c"]
 # We try to strike a balance between individual layer size (as that affects total push time) and total image size
@@ -410,8 +443,8 @@ RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
 RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/coqui \
    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/faster-whisper \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/parler-tts \
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/diffusers \
@@ -420,6 +453,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
 RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/kokoro \
    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/openvoice \
+    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/exllama2 \
    ; fi && \
@@ -438,6 +474,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/rerankers \
+    ; fi && \
+    if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
+        make -C backend/python/mamba \
    ; fi

 # Make sure the models directory exists
--- a/86
+++ b/86
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
+CPPLLAMA_VERSION?=92bc493917d43b83e592349e138b54c90b1c3ea7

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
@@ -18,6 +18,10 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
 PIPER_REPO?=https://github.com/mudler/go-piper
 PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0

+# stablediffusion version
+STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
+STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
+
 # bark.cpp
 BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
 BARKCPP_VERSION?=v1.0.0
@@ -175,6 +179,11 @@ ifeq ($(STATIC),true)
 	LD_FLAGS+=-linkmode external -extldflags -static
 endif

+ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
+#	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
+	OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
+endif
+
 ifeq ($(findstring tts,$(GO_TAGS)),tts)
 #	OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
 #	OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
@@ -186,7 +195,6 @@ endif
 ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
@@ -265,6 +273,19 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o

+## stable diffusion (onnx)
+sources/go-stable-diffusion:
+	mkdir -p sources/go-stable-diffusion
+	cd sources/go-stable-diffusion && \
+	git init && \
+	git remote add origin $(STABLEDIFFUSION_REPO) && \
+	git fetch origin && \
+	git checkout $(STABLEDIFFUSION_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
+	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
+
 ## stablediffusion (ggml)
 sources/stablediffusion-ggml.cpp:
 	git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
@@ -310,18 +331,20 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

-get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp

 replace:
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
+	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp

 dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
+	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp

 prepare-sources: get-sources replace
@@ -332,6 +355,7 @@ rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
+	$(MAKE) -C sources/go-stable-diffusion clean
 	$(MAKE) -C sources/go-piper clean
 	$(MAKE) build

@@ -446,7 +470,7 @@ prepare-test: grpcs

 test: prepare test-models/testmodel.ggml grpcs
 	@echo 'Running tests'
-	export GO_TAGS="tts debug"
+	export GO_TAGS="tts stablediffusion debug"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
@@ -534,10 +558,10 @@ protogen-go-clean:
 	$(RM) bin/*

 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen

 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean

 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -571,14 +595,6 @@ diffusers-protogen:
 diffusers-protogen-clean:
 	$(MAKE) -C backend/python/diffusers protogen-clean

-.PHONY: faster-whisper-protogen
-faster-whisper-protogen:
-	$(MAKE) -C backend/python/faster-whisper protogen
-
-.PHONY: faster-whisper-protogen-clean
-faster-whisper-protogen-clean:
-	$(MAKE) -C backend/python/faster-whisper protogen-clean
-
 .PHONY: exllama2-protogen
 exllama2-protogen:
 	$(MAKE) -C backend/python/exllama2 protogen
@@ -587,6 +603,14 @@ exllama2-protogen:
 exllama2-protogen-clean:
 	$(MAKE) -C backend/python/exllama2 protogen-clean

+.PHONY: mamba-protogen
+mamba-protogen:
+	$(MAKE) -C backend/python/mamba protogen
+
+.PHONY: mamba-protogen-clean
+mamba-protogen-clean:
+	$(MAKE) -C backend/python/mamba protogen-clean
+
 .PHONY: rerankers-protogen
 rerankers-protogen:
 	$(MAKE) -C backend/python/rerankers protogen
@@ -603,6 +627,14 @@ transformers-protogen:
 transformers-protogen-clean:
 	$(MAKE) -C backend/python/transformers protogen-clean

+.PHONY: parler-tts-protogen
+parler-tts-protogen:
+	$(MAKE) -C backend/python/parler-tts protogen
+
+.PHONY: parler-tts-protogen-clean
+parler-tts-protogen-clean:
+	$(MAKE) -C backend/python/parler-tts protogen-clean
+
 .PHONY: kokoro-protogen
 kokoro-protogen:
 	$(MAKE) -C backend/python/kokoro protogen
@@ -611,6 +643,14 @@ kokoro-protogen:
 kokoro-protogen-clean:
 	$(MAKE) -C backend/python/kokoro protogen-clean

+.PHONY: openvoice-protogen
+openvoice-protogen:
+	$(MAKE) -C backend/python/openvoice protogen
+
+.PHONY: openvoice-protogen-clean
+openvoice-protogen-clean:
+	$(MAKE) -C backend/python/openvoice protogen-clean
+
 .PHONY: vllm-protogen
 vllm-protogen:
 	$(MAKE) -C backend/python/vllm protogen
@@ -626,11 +666,13 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/bark
 	$(MAKE) -C backend/python/coqui
 	$(MAKE) -C backend/python/diffusers
-	$(MAKE) -C backend/python/faster-whisper
 	$(MAKE) -C backend/python/vllm
+	$(MAKE) -C backend/python/mamba
 	$(MAKE) -C backend/python/rerankers
 	$(MAKE) -C backend/python/transformers
+	$(MAKE) -C backend/python/parler-tts
 	$(MAKE) -C backend/python/kokoro
+	$(MAKE) -C backend/python/openvoice
 	$(MAKE) -C backend/python/exllama2

 prepare-test-extra: protogen-python
@@ -700,13 +742,6 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2

-backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
-	cp -rf backend/cpp/llama backend/cpp/llama-avx512
-	$(MAKE) -C backend/cpp/llama-avx512 purge
-	$(info ${GREEN}I llama-cpp build info:avx512${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
-	cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
-
 backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-avx
 	$(MAKE) -C backend/cpp/llama-avx purge
@@ -781,6 +816,13 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/piper
 endif

+backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/stablediffusion
+endif
+
 backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@
 </p>

 <p align="center">
-<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
 </p>

 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -1,17 +1,56 @@
 name: stablediffusion
-backend: stablediffusion-ggml
-cfg_scale: 4.5
-
-options:
- sampler:euler
+backend: stablediffusion
 parameters:
-  model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
-step: 25
+  model: stablediffusion_assets
+
+license: "BSD-3"
+urls:
+- https://github.com/EdVince/Stable-Diffusion-NCNN
+- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
+
+description: |
+     Stable Diffusion in NCNN with c++, supported txt2img and img2img

 download_files:
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
-  sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
-  uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
+- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
+  sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
+  sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
+- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
+  sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
+- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
+  sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
+- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
+  sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
+  sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
+- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
+  sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
+- filename: "stablediffusion_assets/log_sigmas.bin"
+  sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
+- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
+  sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
+  sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
+- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
+  sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
+- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
+  sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+  uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
+- filename: "stablediffusion_assets/vocab.txt"
+  sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"

 usage: |
        curl http://localhost:8080/v1/images/generations \
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -21,8 +21,7 @@ service Backend {
  rpc Status(HealthMessage) returns (StatusResponse) {}

  rpc StoresSet(StoresSetOptions) returns (Result) {}
-  rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
-  rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
+  rpc StoresReset(StoresResetOptions) returns (Result) {}
  rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}

  rpc Rerank(RerankRequest) returns (RerankResult) {}
@@ -78,19 +77,10 @@ message StoresSetOptions {
  repeated StoresValue Values = 2;
 }

-message StoresDeleteOptions {
+message StoresResetOptions {
  repeated StoresKey Keys = 1;
 }

-message StoresGetOptions {
-  repeated StoresKey Keys = 1;
-}
-
-message StoresGetResult {
-  repeated StoresKey Keys = 1;
-  repeated StoresValue Values = 2;
-}
-
 message StoresFindOptions {
  StoresKey Key = 1;
  int32 TopK = 2;
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -22,7 +22,6 @@
 #include "backend.grpc.pb.h"
 #include "utils.hpp"
 #include "sampling.h"
-#include "speculative.h"
 // include std::regex
 #include <cstddef>
 #include <thread>
@@ -186,45 +185,12 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
    return out;
 }

-struct llama_slot_params {
-    uint32_t seed      = -1; // RNG seed
-    bool stream        = true;
-    bool cache_prompt  = true; // remember the prompt to avoid reprocessing all prompt
-    bool return_tokens = false;
-
-    int32_t n_keep    =  0; // number of tokens to keep from initial prompt
-    int32_t n_discard =  0; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
-    int32_t n_predict = -1; // new tokens to predict
-    int32_t n_indent  =  0; // mininum line indentation for the generated text in number of whitespace characters
-
-    int64_t t_max_prompt_ms  = -1; // TODO: implement
-    int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
-
-    std::vector<common_adapter_lora_info> lora;
-
-    std::vector<std::string> antiprompt;
-    std::vector<std::string> response_fields;
-    bool timings_per_token = false;
-    bool post_sampling_probs = false;
-    bool ignore_eos = false;
-
-    json input_prefix;
-    json input_suffix;
-
-    struct common_params_sampling sampling;
-    struct common_params_speculative speculative;
-};
-
-
 struct llama_client_slot
 {
    int id;
    int task_id = -1;

-    struct llama_slot_params params;
-    common_speculative * spec = nullptr;
-    llama_batch batch_spec = {};
-
+    struct slot_params params;

    slot_state state = IDLE;
    slot_command command = NONE;
@@ -317,7 +283,6 @@ struct llama_client_slot
        images.clear();
    }

-
    bool has_budget(common_params &global_params) {
        if (params.n_predict == -1 && global_params.n_predict == -1)
        {
@@ -489,10 +454,6 @@ struct llama_server_context
 {
    llama_model *model = nullptr;
    llama_context *ctx = nullptr;
-    common_init_result llama_init_dft;
-    llama_context * ctx_dft = nullptr;
-    llama_model * model_dft = nullptr;
-    llama_context_params cparams_dft;
    const llama_vocab * vocab = nullptr;

    clip_ctx *clp_ctx = nullptr;
@@ -541,7 +502,6 @@ struct llama_server_context
        }
    }

-
    bool load_model(const common_params &params_)
    {
        params = params_;
@@ -585,45 +545,6 @@ struct llama_server_context
        add_bos_token = llama_vocab_get_add_bos(vocab);
        has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;

-        if (!params.speculative.model.empty()) {
-            LOG("loading draft model '%s'\n", params.speculative.model.c_str());
-
-            auto params_dft = params;
-
-            params_dft.devices      = params.speculative.devices;
-            params_dft.model        = params.speculative.model;
-            params_dft.n_ctx        = params.speculative.n_ctx == 0 ? params.n_ctx / params.n_parallel : params.speculative.n_ctx;
-            params_dft.n_gpu_layers = params.speculative.n_gpu_layers;
-            params_dft.n_parallel   = 1;
-
-            llama_init_dft = common_init_from_params(params_dft);
-
-            model_dft = llama_init_dft.model.get();
-
-            if (model_dft == nullptr) {
-                LOG("failed to load draft model, '%s'\n", params.speculative.model.c_str());
-                return false;
-            }
-
-            if (!common_speculative_are_compatible(ctx, llama_init_dft.context.get())) {
-                LOG("the draft model '%s' is not compatible with the target model '%s'\n", params.speculative.model.c_str(), params.model.c_str());
-
-                return false;
-            }
-
-            const int n_ctx_dft = llama_n_ctx(llama_init_dft.context.get());
-
-            cparams_dft = common_context_params_to_llama(params_dft);
-            cparams_dft.n_batch = n_ctx_dft;
-
-            // force F16 KV cache for the draft model for extra performance
-            cparams_dft.type_k = GGML_TYPE_F16;
-            cparams_dft.type_v = GGML_TYPE_F16;
-
-            // the context is not needed - we will create one for each slot
-            llama_init_dft.context.reset();
-        }
-
        return true;
    }

@@ -652,22 +573,6 @@ struct llama_server_context
            slot.n_ctx = n_ctx_slot;
            slot.n_predict = params.n_predict;

-            if (model_dft) {
-                slot.batch_spec = llama_batch_init(params.speculative.n_max + 1, 0, 1);
-
-                ctx_dft = llama_init_from_model(model_dft, cparams_dft);
-                if (ctx_dft == nullptr) {
-                    LOG("%s", "failed to create draft context\n");
-                    return;
-                }
-
-                slot.spec = common_speculative_init(ctx_dft);
-                if (slot.spec == nullptr) {
-                    LOG("%s", "failed to create speculator\n");
-                    return;
-                }
-            }
-
            LOG_INFO("new slot", {
                {"slot_id",    slot.id},
                {"n_ctx_slot", slot.n_ctx}
@@ -776,11 +681,9 @@ struct llama_server_context
    }

    bool launch_slot_with_data(llama_client_slot* &slot, json data) {
-        llama_slot_params default_params;
+        slot_params default_params;
        common_params_sampling default_sparams;
-        
-        default_sparams.speculative = params_base.speculative;
-
+ 
        slot->params.stream             = json_value(data, "stream",            false);
        slot->params.cache_prompt       = json_value(data, "cache_prompt",      false);
        slot->params.n_predict          = json_value(data, "n_predict",         default_params.n_predict);
@@ -804,15 +707,6 @@ struct llama_server_context
        slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
        slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);

-
-        slot->sparams.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
-        slot->sparams.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
-        slot->sparams.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
-
-        slot->sparams.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min);
-        slot->sparams.speculative.n_min = std::max(params.speculative.n_min, 2);
-        slot->sparams.speculative.n_max = std::max(params.speculative.n_max, 0);
-
        if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
            // Might be better to reject the request with a 400 ?
            LOG_WARNING("Max tokens to predict exceeds server configuration", {
@@ -2130,97 +2024,6 @@ struct llama_server_context
            }
        }

-        // do speculative decoding
-        for (auto & slot : slots) {
-            if (!slot.is_processing() || !(ctx_dft && params.speculative.n_max > 0)) {
-                continue;
-            }
-
-            if (slot.state != PROCESSING) {
-                continue;
-            }
-
-            // determine the max draft that fits the current slot state
-            int n_draft_max = slot.params.speculative.n_max;
-
-            // note: n_past is not yet increased for the `id` token sampled above
-            //       also, need to leave space for 1 extra token to allow context shifts
-            n_draft_max = std::min(n_draft_max, slot.n_ctx - slot.n_past - 2);
-
-            if (slot.n_remaining > 0) {
-                n_draft_max = std::min(n_draft_max, slot.n_remaining - 1);
-            }
-
-            LOG("max possible draft: %d\n", n_draft_max);
-
-            if (n_draft_max < slot.params.speculative.n_min) {
-                LOG("the max possible draft is too small: %d < %d - skipping speculative decoding\n", n_draft_max, slot.params.speculative.n_min);
-
-                continue;
-            }
-
-            llama_token id = slot.sampled;
-
-            struct common_speculative_params params_spec;
-            params_spec.n_draft   = n_draft_max;
-            params_spec.n_reuse   = llama_n_ctx(ctx_dft) - slot.params.speculative.n_max;
-            params_spec.p_min     = slot.params.speculative.p_min;
-
-            llama_tokens draft = common_speculative_gen_draft(slot.spec, params_spec, slot.cache_tokens, id);
-
-            // ignore small drafts
-            if (slot.params.speculative.n_min > (int) draft.size()) {
-                LOG("ignoring small draft: %d < %d\n", (int) draft.size(), slot.params.speculative.n_min);
-
-                continue;
-            }
-
-            // construct the speculation batch
-            common_batch_clear(slot.batch_spec);
-            common_batch_add  (slot.batch_spec, id, slot.n_past, { slot.id }, true);
-
-            for (size_t i = 0; i < draft.size(); ++i) {
-                common_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
-            }
-
-            LOG("decoding speculative batch, size = %d\n", slot.batch_spec.n_tokens);
-
-            llama_decode(ctx, slot.batch_spec);
-
-            // the accepted tokens from the speculation
-            const auto ids = common_sampler_sample_and_accept_n(slot.ctx_sampling, ctx, draft);
-
-            slot.n_past    += ids.size();
-            slot.n_decoded += ids.size();
-
-            slot.cache_tokens.push_back(id);
-            slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
-
-            llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
-
-            for (size_t i = 0; i < ids.size(); ++i) {
-                completion_token_output result;
-
-                result.tok          = ids[i];
-                result.text_to_send = common_token_to_piece(ctx, result.tok, params.special);
-                //result.prob         = 1.0f; // set later
-
-                // TODO: set result.probs
-
-                if (!process_token(result, slot)) {
-                    // release slot because of stop condition
-                    slot.release();
-                    slot.print_timings();
-                    send_final_response(slot);
-                    metrics.on_prediction(slot);
-                    break;
-                }
-            }
-
-        LOG("accepted %d/%d draft tokens, new n_past = %d\n", (int) ids.size() - 1, (int) draft.size(), slot.n_past);
-        }
-    
-
        LOG_VERBOSE("slots updated", {});
        return true;
    }
@@ -2493,30 +2296,6 @@ static void params_parse(const backend::ModelOptions* request,
    params.cpuparams.n_threads = request->threads();
    params.n_gpu_layers = request->ngpulayers();
    params.n_batch = request->nbatch();
-    params.speculative.model = request->draftmodel();
-
-    // If options is not NULL, parse options
-    for (int i = 0; request->options()[i] != NULL; i++) {
-        char *optname = strtok(request->options()[i], ":");
-        char *optval = strtok(NULL, ":");
-        if (optval == NULL) {
-            optval = "true";
-        }
-
-        if (!strcmp(optname, "speculative.n_gpu_layers")) {
-            params.speculative.n_gpu_layers = std::stoi(optval);
-        }
-        if (!strcmp(optname, "speculative.n_ctx")) {
-            params.speculative.n_ctx = std::stoi(optval);
-        }
-    }
-
-    if params.speculative.n_gpu_layers == 0 {
-        params.speculative.n_gpu_layers = params.n_gpu_layers;
-    }
-    if params.speculative.n_ctx == 0 {
-        params.speculative.n_ctx = params.n_ctx;
-    }
    // Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
    //params.n_parallel = 1;
    const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
--- a/backend/go/image/stablediffusion/main.go
+++ b/backend/go/image/stablediffusion/main.go
@@ -0,0 +1,21 @@
+package main
+
+// Note: this is started internally by LocalAI and a server is allocated for each model
+
+import (
+	"flag"
+
+	grpc "github.com/mudler/LocalAI/pkg/grpc"
+)
+
+var (
+	addr = flag.String("addr", "localhost:50051", "the address to connect to")
+)
+
+func main() {
+	flag.Parse()
+
+	if err := grpc.StartServer(*addr, &Image{}); err != nil {
+		panic(err)
+	}
+}
--- a/backend/go/image/stablediffusion/stablediffusion.go
+++ b/backend/go/image/stablediffusion/stablediffusion.go
@@ -0,0 +1,33 @@
+package main
+
+// This is a wrapper to statisfy the GRPC service interface
+// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
+import (
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/stablediffusion"
+)
+
+type Image struct {
+	base.SingleThread
+	stablediffusion *stablediffusion.StableDiffusion
+}
+
+func (image *Image) Load(opts *pb.ModelOptions) error {
+	var err error
+	// Note: the Model here is a path to a directory containing the model files
+	image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
+	return err
+}
+
+func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
+	return image.stablediffusion.GenerateImage(
+		int(opts.Height),
+		int(opts.Width),
+		int(opts.Mode),
+		int(opts.Step),
+		int(opts.Seed),
+		opts.PositivePrompt,
+		opts.NegativePrompt,
+		opts.Dst)
+}
--- a/backend/go/stores/store.go
+++ b/backend/go/stores/store.go
@@ -4,101 +4,36 @@ package main
 // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
 import (
 	"container/heap"
+	"context"
 	"fmt"
 	"math"
-	"slices"
+	"runtime"

 	"github.com/mudler/LocalAI/pkg/grpc/base"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	chromem "github.com/philippgille/chromem-go"

 	"github.com/rs/zerolog/log"
 )

 type Store struct {
 	base.SingleThread
-
-	// The sorted keys
-	keys [][]float32
-	// The sorted values
-	values [][]byte
-
-	// If for every K it holds that ||k||^2 = 1, then we can use the normalized distance functions
-	// TODO: Should we normalize incoming keys if they are not instead?
-	keysAreNormalized bool
-	// The first key decides the length of the keys
-	keyLen int
-}
-
-// TODO: Only used for sorting using Go's builtin implementation. The interfaces are columnar because
-// that's theoretically best for memory layout and cache locality, but this isn't optimized yet.
-type Pair struct {
-	Key   []float32
-	Value []byte
+	*chromem.DB
+	*chromem.Collection
 }

 func NewStore() *Store {
-	return &Store{
-		keys:              make([][]float32, 0),
-		values:            make([][]byte, 0),
-		keysAreNormalized: true,
-		keyLen:            -1,
-	}
-}
-
-func compareSlices(k1, k2 []float32) int {
-	assert(len(k1) == len(k2), fmt.Sprintf("compareSlices: len(k1) = %d, len(k2) = %d", len(k1), len(k2)))
-
-	return slices.Compare(k1, k2)
-}
-
-func hasKey(unsortedSlice [][]float32, target []float32) bool {
-	return slices.ContainsFunc(unsortedSlice, func(k []float32) bool {
-		return compareSlices(k, target) == 0
-	})
-}
-
-func findInSortedSlice(sortedSlice [][]float32, target []float32) (int, bool) {
-	return slices.BinarySearchFunc(sortedSlice, target, func(k, t []float32) int {
-		return compareSlices(k, t)
-	})
-}
-
-func isSortedPairs(kvs []Pair) bool {
-	for i := 1; i < len(kvs); i++ {
-		if compareSlices(kvs[i-1].Key, kvs[i].Key) > 0 {
-			return false
-		}
-	}
-
-	return true
-}
-
-func isSortedKeys(keys [][]float32) bool {
-	for i := 1; i < len(keys); i++ {
-		if compareSlices(keys[i-1], keys[i]) > 0 {
-			return false
-		}
-	}
-
-	return true
-}
-
-func sortIntoKeySlicese(keys []*pb.StoresKey) [][]float32 {
-	ks := make([][]float32, len(keys))
-
-	for i, k := range keys {
-		ks[i] = k.Floats
-	}
-
-	slices.SortFunc(ks, compareSlices)
-
-	assert(len(ks) == len(keys), fmt.Sprintf("len(ks) = %d, len(keys) = %d", len(ks), len(keys)))
-	assert(isSortedKeys(ks), "keys are not sorted")
-
-	return ks
+	return &Store{}
 }

 func (s *Store) Load(opts *pb.ModelOptions) error {
+	db := chromem.NewDB()
+	collection, err := db.CreateCollection("all-documents", nil, nil)
+	if err != nil {
+		return err
+	}
+	s.DB = db
+	s.Collection = collection
 	return nil
 }

@@ -111,156 +46,25 @@ func (s *Store) StoresSet(opts *pb.StoresSetOptions) error {
 	if len(opts.Keys) != len(opts.Values) {
 		return fmt.Errorf("len(keys) = %d, len(values) = %d", len(opts.Keys), len(opts.Values))
 	}
-
-	if s.keyLen == -1 {
-		s.keyLen = len(opts.Keys[0].Floats)
-	} else {
-		if len(opts.Keys[0].Floats) != s.keyLen {
-			return fmt.Errorf("Try to add key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
-		}
-	}
-
-	kvs := make([]Pair, len(opts.Keys))
+	docs := []chromem.Document{}

 	for i, k := range opts.Keys {
-		if s.keysAreNormalized && !isNormalized(k.Floats) {
-			s.keysAreNormalized = false
-			var sample []float32
-			if len(s.keys) > 5 {
-				sample = k.Floats[:5]
-			} else {
-				sample = k.Floats
-			}
-			log.Debug().Msgf("Key is not normalized: %v", sample)
-		}
-
-		kvs[i] = Pair{
-			Key:   k.Floats,
-			Value: opts.Values[i].Bytes,
-		}
+		docs = append(docs, chromem.Document{
+			ID:      k.String(),
+			Content: opts.Values[i].String(),
+		})
 	}

-	slices.SortFunc(kvs, func(a, b Pair) int {
-		return compareSlices(a.Key, b.Key)
-	})
-
-	assert(len(kvs) == len(opts.Keys), fmt.Sprintf("len(kvs) = %d, len(opts.Keys) = %d", len(kvs), len(opts.Keys)))
-	assert(isSortedPairs(kvs), "keys are not sorted")
-
-	l := len(kvs) + len(s.keys)
-	merge_ks := make([][]float32, 0, l)
-	merge_vs := make([][]byte, 0, l)
-
-	i, j := 0, 0
-	for {
-		if i+j >= l {
-			break
-		}
-
-		if i >= len(kvs) {
-			merge_ks = append(merge_ks, s.keys[j])
-			merge_vs = append(merge_vs, s.values[j])
-			j++
-			continue
-		}
-
-		if j >= len(s.keys) {
-			merge_ks = append(merge_ks, kvs[i].Key)
-			merge_vs = append(merge_vs, kvs[i].Value)
-			i++
-			continue
-		}
-
-		c := compareSlices(kvs[i].Key, s.keys[j])
-		if c < 0 {
-			merge_ks = append(merge_ks, kvs[i].Key)
-			merge_vs = append(merge_vs, kvs[i].Value)
-			i++
-		} else if c > 0 {
-			merge_ks = append(merge_ks, s.keys[j])
-			merge_vs = append(merge_vs, s.values[j])
-			j++
-		} else {
-			merge_ks = append(merge_ks, kvs[i].Key)
-			merge_vs = append(merge_vs, kvs[i].Value)
-			i++
-			j++
-		}
-	}
-
-	assert(len(merge_ks) == l, fmt.Sprintf("len(merge_ks) = %d, l = %d", len(merge_ks), l))
-	assert(isSortedKeys(merge_ks), "merge keys are not sorted")
-
-	s.keys = merge_ks
-	s.values = merge_vs
-
-	return nil
+	return s.Collection.AddDocuments(context.Background(), docs, runtime.NumCPU())
 }

-func (s *Store) StoresDelete(opts *pb.StoresDeleteOptions) error {
-	if len(opts.Keys) == 0 {
-		return fmt.Errorf("no keys to delete")
+func (s *Store) StoresReset(opts *pb.StoresResetOptions) error {
+	err := s.DB.DeleteCollection("all-documents")
+	if err != nil {
+		return err
 	}
-
-	if len(opts.Keys) == 0 {
-		return fmt.Errorf("no keys to add")
-	}
-
-	if s.keyLen == -1 {
-		s.keyLen = len(opts.Keys[0].Floats)
-	} else {
-		if len(opts.Keys[0].Floats) != s.keyLen {
-			return fmt.Errorf("Trying to delete key with length %d when existing length is %d", len(opts.Keys[0].Floats), s.keyLen)
-		}
-	}
-
-	ks := sortIntoKeySlicese(opts.Keys)
-
-	l := len(s.keys) - len(ks)
-	merge_ks := make([][]float32, 0, l)
-	merge_vs := make([][]byte, 0, l)
-
-	tail_ks := s.keys
-	tail_vs := s.values
-	for _, k := range ks {
-		j, found := findInSortedSlice(tail_ks, k)
-
-		if found {
-			merge_ks = append(merge_ks, tail_ks[:j]...)
-			merge_vs = append(merge_vs, tail_vs[:j]...)
-			tail_ks = tail_ks[j+1:]
-			tail_vs = tail_vs[j+1:]
-		} else {
-			assert(!hasKey(s.keys, k), fmt.Sprintf("Key exists, but was not found: t=%d, %v", len(tail_ks), k))
-		}
-
-		log.Debug().Msgf("Delete: found = %v, t = %d, j = %d, len(merge_ks) = %d, len(merge_vs) = %d", found, len(tail_ks), j, len(merge_ks), len(merge_vs))
-	}
-
-	merge_ks = append(merge_ks, tail_ks...)
-	merge_vs = append(merge_vs, tail_vs...)
-
-	assert(len(merge_ks) <= len(s.keys), fmt.Sprintf("len(merge_ks) = %d, len(s.keys) = %d", len(merge_ks), len(s.keys)))
-
-	s.keys = merge_ks
-	s.values = merge_vs
-
-	assert(len(s.keys) >= l, fmt.Sprintf("len(s.keys) = %d, l = %d", len(s.keys), l))
-	assert(isSortedKeys(s.keys), "keys are not sorted")
-	assert(func() bool {
-		for _, k := range ks {
-			if _, found := findInSortedSlice(s.keys, k); found {
-				return false
-			}
-		}
-		return true
-	}(), "Keys to delete still present")
-
-	if len(s.keys) != l {
-		log.Debug().Msgf("Delete: Some keys not found: len(s.keys) = %d, l = %d", len(s.keys), l)
-	}
-
-	return nil
+	s.Collection, err = s.CreateCollection("all-documents", nil, nil)
+	return err
 }

 func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error) {
@@ -311,16 +115,12 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
 }

 func isNormalized(k []float32) bool {
-	var sum float64
-
+	var sum float32
 	for _, v := range k {
-		v64 := float64(v)
-		sum += v64*v64
+		sum += v
 	}

-	s := math.Sqrt(sum)
-
-	return s >= 0.99 && s <= 1.01
+	return sum == 1.0
 }

 // TODO: This we could replace with handwritten SIMD code
@@ -332,7 +132,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
 		dot += k1[i] * k2[i]
 	}

-	assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
+	assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))

 	// 2.0 * (1.0 - dot) would be the Euclidean distance
 	return dot
@@ -422,7 +222,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {

 	sim := float32(dot / (mag1 * math.Sqrt(mag2)))

-	assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
+	assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))

 	return sim
 }
--- a/backend/python/faster-whisper/backend.py
+++ b/backend/python/faster-whisper/backend.py
@@ -1,94 +0,0 @@
-#!/usr/bin/env python3
-"""
-This is an extra gRPC server of LocalAI for Bark TTS
-"""
-from concurrent import futures
-import time
-import argparse
-import signal
-import sys
-import os
-import backend_pb2
-import backend_pb2_grpc
-
-from faster_whisper import WhisperModel
-
-import grpc
-
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    BackendServicer is the class that implements the gRPC service
-    """
-    def Health(self, request, context):
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-    def LoadModel(self, request, context):
-        device = "cpu"
-        # Get device
-        # device = "cuda" if request.CUDA else "cpu"
-        if request.CUDA:
-            device = "cuda"
-
-        try:
-            print("Preparing models, please wait", file=sys.stderr)
-            self.model = WhisperModel(request.Model, device=device, compute_type="float16")
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        # Implement your logic here for the LoadModel service
-        # Replace this with your desired response
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def AudioTranscription(self, request, context):
-        resultSegments = []
-        text = ""
-        try:
-            segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
-            id = 0
-            for segment in segments:
-                print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
-                resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
-                text += segment.text
-                id += 1            
-        except Exception as err:
-            print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
-
-        return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
--- a/backend/python/faster-whisper/requirements-cpu.txt
+++ b/backend/python/faster-whisper/requirements-cpu.txt
@@ -1,8 +0,0 @@
-faster-whisper
-opencv-python
-accelerate
-compel
-peft
-sentencepiece
-torch==2.4.1
-optimum-quanto
--- a/backend/python/faster-whisper/requirements-cublas11.txt
+++ b/backend/python/faster-whisper/requirements-cublas11.txt
@@ -1,9 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-faster-whisper
-opencv-python
-accelerate
-compel
-peft
-sentencepiece
-optimum-quanto
--- a/backend/python/faster-whisper/requirements-cublas12.txt
+++ b/backend/python/faster-whisper/requirements-cublas12.txt
@@ -1,8 +0,0 @@
-torch==2.4.1
-faster-whisper
-opencv-python
-accelerate
-compel
-peft
-sentencepiece
-optimum-quanto
--- a/backend/python/faster-whisper/requirements-hipblas.txt
+++ b/backend/python/faster-whisper/requirements-hipblas.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-faster-whisper
--- a/backend/python/mamba/Makefile
+++ b/backend/python/mamba/Makefile
@@ -0,0 +1,29 @@
+.PHONY: mamba
+mamba: protogen
+	bash install.sh 
+
+.PHONY: run
+run: protogen
+	@echo "Running mamba..."
+	bash run.sh
+	@echo "mamba run."
+
+.PHONY: test
+test: protogen
+	@echo "Testing mamba..."
+	bash test.sh
+	@echo "mamba tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+	$(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
+
+.PHONY: clean
+clean: protogen-clean
+	$(RM) -r venv __pycache__
--- a/backend/python/mamba/README.md
+++ b/backend/python/mamba/README.md
@@ -0,0 +1,5 @@
+# Creating a separate environment for the mamba project
+
+```
+make mamba
+```
--- a/backend/python/mamba/backend.py
+++ b/backend/python/mamba/backend.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+from concurrent import futures
+import time
+import argparse
+import signal
+import sys
+import os
+
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    A gRPC servicer that implements the Backend service defined in backend.proto.
+    """
+    def generate(self,prompt, max_new_tokens):
+        """
+        Generates text based on the given prompt and maximum number of new tokens.
+
+        Args:
+            prompt (str): The prompt to generate text from.
+            max_new_tokens (int): The maximum number of new tokens to generate.
+
+        Returns:
+            str: The generated text.
+        """
+        self.generator.end_beam_search()
+
+        # Tokenizing the input
+        ids = self.generator.tokenizer.encode(prompt)
+
+        self.generator.gen_begin_reuse(ids)
+        initial_len = self.generator.sequence[0].shape[0]
+        has_leading_space = False
+        decoded_text = ''
+        for i in range(max_new_tokens):
+            token = self.generator.gen_single_token()
+            if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
+                has_leading_space = True
+
+            decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
+            if has_leading_space:
+                decoded_text = ' ' + decoded_text
+
+            if token.item() == self.generator.tokenizer.eos_token_id:
+                break
+        return decoded_text
+
+    def Health(self, request, context):
+        """
+        Returns a health check message.
+
+        Args:
+            request: The health check request.
+            context: The gRPC context.
+
+        Returns:
+            backend_pb2.Reply: The health check reply.
+        """
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+    def LoadModel(self, request, context):
+        """
+        Loads a language model.
+
+        Args:
+            request: The load model request.
+            context: The gRPC context.
+
+        Returns:
+            backend_pb2.Result: The load model result.
+        """
+        try:
+            tokenizerModel = request.Tokenizer
+            if tokenizerModel == "":
+                tokenizerModel = request.Model
+
+            tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
+            if MAMBA_CHAT:
+                tokenizer.eos_token = "<|endoftext|>"
+                tokenizer.pad_token = tokenizer.eos_token
+            self.tokenizer = tokenizer
+            self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def Predict(self, request, context):
+        """
+        Generates text based on the given prompt and sampling parameters.
+
+        Args:
+            request: The predict request.
+            context: The gRPC context.
+
+        Returns:
+            backend_pb2.Result: The predict result.
+        """
+        if request.TopP == 0:
+            request.TopP = 0.9
+
+        max_tokens = request.Tokens
+
+        if request.Tokens == 0:
+            max_tokens = 2000
+
+        # encoded_input = self.tokenizer(request.Prompt)
+        tokens = self.tokenizer(request.Prompt, return_tensors="pt")
+        input_ids = tokens.input_ids.to(device="cuda")
+        out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
+                                     top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
+
+        decoded = self.tokenizer.batch_decode(out)
+       
+        generated_text = decoded[0]
+
+        # Remove prompt from response if present
+        if request.Prompt in generated_text:
+            generated_text = generated_text.replace(request.Prompt, "")
+
+        return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
+
+    def PredictStream(self, request, context):
+        """
+        Generates text based on the given prompt and sampling parameters, and streams the results.
+
+        Args:
+            request: The predict stream request.
+            context: The gRPC context.
+
+        Returns:
+            backend_pb2.Result: The predict stream result.
+        """
+        yield self.Predict(request, context)
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+
+    serve(args.addr)
--- a/backend/python/mamba/install.sh
+++ b/backend/python/mamba/install.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -e
+
+LIMIT_TARGETS="cublas"
+EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
+
+source $(dirname $0)/../common/libbackend.sh
+
+installRequirements
--- a/backend/python/mamba/requirements-after.txt
+++ b/backend/python/mamba/requirements-after.txt
@@ -0,0 +1,2 @@
+causal-conv1d==1.4.0
+mamba-ssm==2.2.2
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -0,0 +1,2 @@
+torch==2.4.1
+transformers
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.4.1+cu118
+transformers
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch==2.4.1
+transformers
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -0,0 +1,6 @@
+# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
+# this also means that we need to install the basic build dependencies into the venv ourselves
+# https://github.com/Dao-AILab/causal-conv1d/issues/24
+packaging
+setuptools
+wheel
--- a/backend/python/faster-whisper/requirements.txt
+++ b/backend/python/faster-whisper/requirements.txt
@@ -1,3 +1,3 @@
 grpcio==1.69.0
 protobuf
-grpcio-tools
+certifi
--- a/backend/python/mamba/run.sh
+++ b/backend/python/mamba/run.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+LIMIT_TARGETS="cublas"
+
+source $(dirname $0)/../common/libbackend.sh
+
+startBackend $@
--- a/backend/python/mamba/test.py
+++ b/backend/python/mamba/test.py
@@ -0,0 +1,76 @@
+import unittest
+import subprocess
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+import unittest
+import subprocess
+import time
+import grpc
+import backend_pb2_grpc
+import backend_pb2
+
+class TestBackendServicer(unittest.TestCase):
+    """
+    TestBackendServicer is the class that tests the gRPC service.
+
+    This class contains methods to test the startup and shutdown of the gRPC service.
+    """
+    def setUp(self):
+        self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
+        time.sleep(10)
+
+    def tearDown(self) -> None:
+        self.service.terminate()
+        self.service.wait()
+
+    def test_server_startup(self):
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.Health(backend_pb2.HealthMessage())
+                self.assertEqual(response.message, b'OK')
+        except Exception as err:
+            print(err)
+            self.fail("Server failed to start")
+        finally:
+            self.tearDown()
+    def test_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
+
+    def test_text(self):
+        """
+        This method tests if the embeddings are generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
+                self.assertTrue(response.success)
+                req = backend_pb2.PredictOptions(Prompt="The capital of France is")
+                resp = stub.Predict(req)
+                self.assertIsNotNone(resp.message)
+        except Exception as err:
+            print(err)
+            self.fail("text service failed")
+        finally:
+            self.tearDown()
--- a/backend/python/faster-whisper/test.sh
+++ b/backend/python/faster-whisper/test.sh
--- a/backend/python/faster-whisper/Makefile
+++ b/backend/python/faster-whisper/Makefile
@@ -1,9 +1,8 @@
 .DEFAULT_GOAL := install

 .PHONY: install
-install:
+install: protogen
 	bash install.sh
-	$(MAKE) protogen

 .PHONY: protogen
 protogen: backend_pb2_grpc.py backend_pb2.py
@@ -13,8 +12,14 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	bash protogen.sh
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
-	rm -rf venv __pycache__
+	rm -rf venv __pycache__
+
+.PHONY: test
+test: protogen
+	@echo "Testing openvoice..."
+	bash test.sh
+	@echo "openvoice tested."
--- a/backend/python/openvoice/backend.py
+++ b/backend/python/openvoice/backend.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""
+Extra gRPC server for OpenVoice models.
+"""
+from concurrent import futures
+
+import argparse
+import signal
+import sys
+import os
+import torch
+from openvoice import se_extractor
+from openvoice.api import ToneColorConverter
+from melo.api import TTS
+
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    A gRPC servicer for the backend service.
+
+    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
+    """
+    def Health(self, request, context):
+        """
+        A gRPC method that returns the health status of the backend service.
+
+        Args:
+            request: A HealthRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Reply object that contains the health status of the backend service.
+        """
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+    def LoadModel(self, request, context):
+        """
+        A gRPC method that loads a model into memory.
+
+        Args:
+            request: A LoadModelRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Result object that contains the result of the LoadModel operation.
+        """
+        model_name = request.Model
+        try:
+
+            self.clonedVoice = False
+            # Assume directory from request.ModelFile.
+            # Only if request.LoraAdapter it's not an absolute path
+            if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
+                # get base path of modelFile
+                modelFileBase = os.path.dirname(request.ModelFile)
+                request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
+            if request.AudioPath != "":
+                self.clonedVoice = True
+
+            self.modelpath = request.ModelFile
+            self.speaker = request.Type
+            self.ClonedVoicePath = request.AudioPath
+            
+            ckpt_converter = request.Model+'/converter'
+            device = "cuda:0" if torch.cuda.is_available() else "cpu"
+            self.device = device
+            self.tone_color_converter = None
+            if self.clonedVoice:
+                self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
+                self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
+       
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def TTS(self, request, context):
+        model_name = request.model
+        if model_name == "":
+            return backend_pb2.Result(success=False, message="request.model is required")
+        try:
+            # Speed is adjustable
+            speed = 1.0
+            voice = "EN"
+            if request.voice:
+                voice = request.voice
+            model = TTS(language=voice, device=self.device)
+            speaker_ids = model.hps.data.spk2id
+            speaker_key = self.speaker
+            modelpath = self.modelpath
+            for s in speaker_ids.keys():
+                print(f"Speaker: {s} - ID: {speaker_ids[s]}")
+            speaker_id = speaker_ids[speaker_key]
+            speaker_key = speaker_key.lower().replace('_', '-')
+            source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
+            model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
+            if self.clonedVoice:
+                reference_speaker = self.ClonedVoicePath
+                target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
+                # Run the tone color converter
+                encode_message = "@MyShell"
+                self.tone_color_converter.convert(
+                    audio_src_path=request.dst, 
+                    src_se=source_se, 
+                    tgt_se=target_se, 
+                    output_path=request.dst,
+                    message=encode_message)
+           
+            print("[OpenVoice] TTS generated!", file=sys.stderr)
+            print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
+            print(request, file=sys.stderr)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(success=True)
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("[OpenVoice] Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+    print(f"[OpenVoice] startup: {args}", file=sys.stderr)
+    serve(args.addr)
--- a/backend/python/faster-whisper/install.sh
+++ b/backend/python/faster-whisper/install.sh
@@ -12,3 +12,5 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
 fi

 installRequirements
+
+python -m unidic download
--- a/backend/python/openvoice/requirements-cpu.txt
+++ b/backend/python/openvoice/requirements-cpu.txt
@@ -0,0 +1,7 @@
+torch==2.4.1
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -0,0 +1,8 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.4.1+cu118
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -0,0 +1,7 @@
+torch==2.4.1
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-hipblas.txt
+++ b/backend/python/openvoice/requirements-hipblas.txt
@@ -0,0 +1,8 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch==2.4.1+rocm6.0
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
+whisper-timestamped
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -0,0 +1,24 @@
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
+intel-extension-for-pytorch==2.3.110+xpu
+torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
+oneccl_bind_pt==2.3.100+xpu
+optimum[openvino]
+grpcio==1.69.0
+protobuf
+librosa==0.9.1
+faster-whisper==0.9.0
+pydub==0.25.1
+wavmark==0.0.3
+eng_to_ipa==0.0.2
+inflect==7.0.0
+unidecode==1.3.7
+whisper-timestamped==1.14.2
+openai
+python-dotenv
+pypinyin==0.50.0
+cn2an==0.5.22
+jieba==0.42.1
+langid==1.1.6
+git+https://github.com/myshell-ai/MeloTTS.git
+git+https://github.com/myshell-ai/OpenVoice.git
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -0,0 +1,17 @@
+grpcio==1.69.0
+protobuf
+librosa
+faster-whisper
+inflect
+unidecode
+openai
+python-dotenv
+pypinyin
+cn2an==0.5.22
+numpy==1.22.0
+networkx==2.8.8
+jieba==0.42.1
+gradio==5.9.1
+langid==1.1.6
+llvmlite==0.43.0
+setuptools
--- a/backend/python/faster-whisper/run.sh
+++ b/backend/python/faster-whisper/run.sh
--- a/backend/python/openvoice/test.py
+++ b/backend/python/openvoice/test.py
@@ -0,0 +1,82 @@
+"""
+A test script to test the gRPC service
+"""
+import unittest
+import subprocess
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+
+class TestBackendServicer(unittest.TestCase):
+    """
+    TestBackendServicer is the class that tests the gRPC service
+    """
+    def setUp(self):
+        """
+        This method sets up the gRPC service by starting the server
+        """
+        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
+        time.sleep(30)
+
+    def tearDown(self) -> None:
+        """
+        This method tears down the gRPC service by terminating the server
+        """
+        self.service.terminate()
+        self.service.wait()
+
+    def test_server_startup(self):
+        """
+        This method tests if the server starts up successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.Health(backend_pb2.HealthMessage())
+                self.assertEqual(response.message, b'OK')
+        except Exception as err:
+            print(err)
+            self.fail("Server failed to start")
+        finally:
+            self.tearDown()
+
+    def test_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2", 
+                                                                    Type="en-us"))
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
+
+    def test_tts(self):
+        """
+        This method tests if the embeddings are generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
+                self.assertTrue(response.success)
+                tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
+                tts_response = stub.TTS(tts_request)
+                self.assertIsNotNone(tts_response)
+        except Exception as err:
+            print(err)
+            self.fail("TTS service failed")
+        finally:
+            self.tearDown()
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+# Download checkpoints if not present
+if [ ! -d "checkpoints_v2" ]; then
+    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    unzip checkpoints_v2.zip
+fi
+
+runUnittests
--- a/backend/python/parler-tts/Makefile
+++ b/backend/python/parler-tts/Makefile
@@ -0,0 +1,44 @@
+export CONDA_ENV_PATH = "parler.yml"
+SKIP_CONDA?=0
+ifeq ($(BUILD_TYPE), cublas)
+export CONDA_ENV_PATH = "parler-nvidia.yml"
+endif
+
+# Intel GPU are supposed to have dependencies installed in the main python
+# environment, so we skip conda installation for SYCL builds.
+# https://github.com/intel/intel-extension-for-pytorch/issues/538
+ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+export SKIP_CONDA=1
+endif
+
+.PHONY: parler-tts
+parler-tts:
+	@echo "Installing $(CONDA_ENV_PATH)..."
+	bash install.sh $(CONDA_ENV_PATH)
+	$(MAKE) protogen
+
+.PHONY: run
+run: protogen
+	@echo "Running transformers..."
+	bash run.sh
+	@echo "transformers run."
+
+.PHONY: test
+test: protogen
+	@echo "Testing transformers..."
+	bash test.sh
+	@echo "transformers tested."
+
+.PHONY: protogen
+protogen: backend_pb2_grpc.py backend_pb2.py
+
+.PHONY: protogen-clean
+protogen-clean:
+	$(RM) backend_pb2_grpc.py backend_pb2.py
+
+backend_pb2_grpc.py backend_pb2.py:
+	bash protogen.sh
+
+.PHONY: clean
+clean: protogen-clean
+	$(RM) -r venv __pycache__
--- a/backend/python/parler-tts/backend.py
+++ b/backend/python/parler-tts/backend.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""
+Extra gRPC server for MusicgenForConditionalGeneration models.
+"""
+from concurrent import futures
+
+import argparse
+import signal
+import sys
+import os
+
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+from scipy.io.wavfile import write as write_wav
+
+from parler_tts import ParlerTTSForConditionalGeneration
+from transformers import AutoTokenizer
+import soundfile as sf  
+import torch
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    A gRPC servicer for the backend service.
+
+    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
+    """
+    def Health(self, request, context):
+        """
+        A gRPC method that returns the health status of the backend service.
+
+        Args:
+            request: A HealthRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Reply object that contains the health status of the backend service.
+        """
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+    def LoadModel(self, request, context):
+        """
+        A gRPC method that loads a model into memory.
+
+        Args:
+            request: A LoadModelRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Result object that contains the result of the LoadModel operation.
+        """
+        model_name = request.Model
+        device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        try:
+            self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def TTS(self, request, context):
+        model_name = request.model
+        voice = request.voice
+        if voice == "":
+            voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
+        if model_name == "":
+            return backend_pb2.Result(success=False, message="request.model is required")
+        try:
+            device = "cuda:0" if torch.cuda.is_available() else "cpu"
+            input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
+            prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
+           
+            generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
+            audio_arr = generation.cpu().numpy().squeeze()
+            print("[parler-tts] TTS generated!", file=sys.stderr)
+            sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
+            print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
+            print("[parler-tts] TTS for", file=sys.stderr)
+            print(request, file=sys.stderr)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(success=True)
+
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("[parler-tts] Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+    print(f"[parler-tts] startup: {args}", file=sys.stderr)
+    serve(args.addr)
--- a/backend/python/parler-tts/install.sh
+++ b/backend/python/parler-tts/install.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
+# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
+# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
+# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
+if [ "x${BUILD_PROFILE}" == "xintel" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
+fi
+
+
+installRequirements
+
+
+# https://github.com/descriptinc/audiotools/issues/101
+# incompatible protobuf versions.
+PYDIR=python3.10
+pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
+
+if [ ! -d ${pyenv} ]; then
+    echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
+    exit 1
+fi
+
+curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
--- a/backend/python/faster-whisper/protogen.sh
+++ b/backend/python/faster-whisper/protogen.sh
--- a/backend/python/parler-tts/requirements-after.txt
+++ b/backend/python/parler-tts/requirements-after.txt
@@ -0,0 +1,4 @@
+git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
+llvmlite==0.43.0
+numba==0.60.0
+grpcio-tools==1.42.0
--- a/backend/python/parler-tts/requirements-cpu.txt
+++ b/backend/python/parler-tts/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch==2.4.1
--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.4.1+cu118
+torchaudio==2.4.1+cu118
+transformers
+accelerate
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -0,0 +1,4 @@
+torch==2.4.1
+torchaudio==2.4.1
+transformers
+accelerate
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ b/backend/python/parler-tts/requirements-hipblas.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch==2.3.0+rocm6.0
+torchaudio==2.3.0+rocm6.0
+transformers
+accelerate
--- a/backend/python/faster-whisper/requirements-intel.txt
+++ b/backend/python/faster-whisper/requirements-intel.txt
@@ -1,6 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch==2.3.110+xpu
 torch==2.3.1+cxx11.abi
+torchaudio==2.3.1+cxx11.abi
 oneccl_bind_pt==2.3.100+xpu
 optimum[openvino]
-faster-whisper
+transformers
+accelerate
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -0,0 +1,4 @@
+grpcio==1.69.0
+certifi
+llvmlite==0.43.0
+setuptools
--- a/backend/python/parler-tts/run.sh
+++ b/backend/python/parler-tts/run.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+source $(dirname $0)/../common/libbackend.sh
+
+startBackend $@
--- a/backend/python/parler-tts/test.py
+++ b/backend/python/parler-tts/test.py
@@ -0,0 +1,81 @@
+"""
+A test script to test the gRPC service
+"""
+import unittest
+import subprocess
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+
+class TestBackendServicer(unittest.TestCase):
+    """
+    TestBackendServicer is the class that tests the gRPC service
+    """
+    def setUp(self):
+        """
+        This method sets up the gRPC service by starting the server
+        """
+        self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
+        time.sleep(10)
+
+    def tearDown(self) -> None:
+        """
+        This method tears down the gRPC service by terminating the server
+        """
+        self.service.terminate()
+        self.service.wait()
+
+    def test_server_startup(self):
+        """
+        This method tests if the server starts up successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.Health(backend_pb2.HealthMessage())
+                self.assertEqual(response.message, b'OK')
+        except Exception as err:
+            print(err)
+            self.fail("Server failed to start")
+        finally:
+            self.tearDown()
+
+    def test_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
+
+    def test_tts(self):
+        """
+        This method tests if the embeddings are generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
+                self.assertTrue(response.success)
+                tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
+                tts_response = stub.TTS(tts_request)
+                self.assertIsNotNone(tts_response)
+        except Exception as err:
+            print(err)
+            self.fail("TTS service failed")
+        finally:
+            self.tearDown()
--- a/backend/python/parler-tts/test.sh
+++ b/backend/python/parler-tts/test.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+
+source $(dirname $0)/../common/libbackend.sh
+
+runUnittests
--- a/backend/python/transformers/backend.py
+++ b/backend/python/transformers/backend.py
@@ -21,7 +21,7 @@ import torch.cuda


 XPU=os.environ.get("XPU", "0") == "1"
-from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
+from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 from scipy.io import wavfile
 import outetts
@@ -245,10 +245,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                autoTokenizer = False
                self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
                self.SentenceTransformer = True
-            elif request.Type == "Mamba":
-                autoTokenizer = False
-                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-                self.model = MambaForCausalLM.from_pretrained(model_name)
            else:
                print("Automodel", file=sys.stderr)
                self.model = AutoModel.from_pretrained(model_name, 
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
 		}
 	}
 	if (u & FLAG_IMAGE) == FLAG_IMAGE {
-		imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
+		imageBackends := []string{"diffusers", "stablediffusion"}
 		if !slices.Contains(imageBackends, c.Backend) {
 			return false
 		}
--- a/core/config/config_test.go
+++ b/core/config/config_test.go
@@ -48,66 +48,5 @@ var _ = Describe("Test cases for config related functions", func() {
 			// config should includes whisper-1 models's api.config
 			Expect(loadedModelNames).To(ContainElements("whisper-1"))
 		})
-
-		It("Test new loadconfig", func() {
-
-			bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
-			err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
-			Expect(err).To(BeNil())
-			configs := bcl.GetAllBackendConfigs()
-			loadedModelNames := []string{}
-			for _, v := range configs {
-				loadedModelNames = append(loadedModelNames, v.Name)
-			}
-			Expect(configs).ToNot(BeNil())
-			totalModels := len(loadedModelNames)
-
-			Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
-
-			// config should includes text-embedding-ada-002 models's api.config
-			Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
-
-			// config should includes rwkv_test models's api.config
-			Expect(loadedModelNames).To(ContainElements("rwkv_test"))
-
-			// config should includes whisper-1 models's api.config
-			Expect(loadedModelNames).To(ContainElements("whisper-1"))
-
-			// create a temp directory and store a temporary model
-			tmpdir, err := os.MkdirTemp("", "test")
-			Expect(err).ToNot(HaveOccurred())
-			defer os.RemoveAll(tmpdir)
-
-			// create a temporary model
-			model := `name: "test-model"
-description: "test model"
-options:
- foo
- bar
- baz
-`
-			modelFile := tmpdir + "/test-model.yaml"
-			err = os.WriteFile(modelFile, []byte(model), 0644)
-			Expect(err).ToNot(HaveOccurred())
-
-			err = bcl.LoadBackendConfigsFromPath(tmpdir)
-			Expect(err).ToNot(HaveOccurred())
-
-			configs = bcl.GetAllBackendConfigs()
-			Expect(len(configs)).ToNot(Equal(totalModels))
-
-			loadedModelNames = []string{}
-			var testModel BackendConfig
-			for _, v := range configs {
-				loadedModelNames = append(loadedModelNames, v.Name)
-				if v.Name == "test-model" {
-					testModel = v
-				}
-			}
-			Expect(loadedModelNames).To(ContainElements("test-model"))
-			Expect(testModel.Description).To(Equal("test model"))
-			Expect(testModel.Options).To(ContainElements("foo", "bar", "baz"))
-
-		})
 	})
 })
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -687,10 +687,6 @@ var _ = Describe("API test", func() {
 					Name: "model-gallery",
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
 				},
-				{
-					Name: "localai",
-					URL:  "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
-				},
 			}

 			application, err := application.New(
@@ -768,8 +764,10 @@ var _ = Describe("API test", func() {
 			}

 			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-				ID:   "localai@sd-1.5-ggml",
-				Name: "stablediffusion",
+				ID: "model-gallery@stablediffusion",
+				Overrides: map[string]interface{}{
+					"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
+				},
 			})

 			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -780,14 +778,14 @@ var _ = Describe("API test", func() {
 				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 				fmt.Println(response)
 				return response["processed"].(bool)
-			}, "1200s", "10s").Should(Equal(true))
+			}, "360s", "10s").Should(Equal(true))

 			resp, err := http.Post(
 				"http://127.0.0.1:9090/v1/images/generations",
 				"application/json",
 				bytes.NewBuffer([]byte(`{
-					 			"prompt": "a lovely cat",
-								"step": 1,  "seed":9000,
+					 			"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
+								"mode": 2,  "seed":9000,
 					 			"size": "256x256", "n":2}`)))
 			// The response should contain an URL
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
@@ -796,7 +794,6 @@ var _ = Describe("API test", func() {

 			imgUrlResp := &schema.OpenAIResponse{}
 			err = json.Unmarshal(dat, imgUrlResp)
-			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
 			Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
 			imgUrl := imgUrlResp.Data[0].URL
 			Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)
@@ -1003,7 +1000,7 @@ var _ = Describe("API test", func() {
 					}
 				}

-				deleteBody := schema.StoresDelete{
+				deleteBody := schema.StoresReset{
 					Keys: [][]float32{
 						{0.1, 0.2, 0.3},
 					},
--- a/core/http/endpoints/localai/backend_monitor.go
+++ b/core/http/endpoints/localai/backend_monitor.go
@@ -28,7 +28,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
 	}
 }

-// BackendShutdownEndpoint shuts down the specified backend
+// BackendMonitorEndpoint shuts down the specified backend
 // @Summary Backend monitor endpoint
 // @Param request body schema.BackendMonitorRequest true "Backend statistics request"
 // @Router /backend/shutdown [post]
--- a/core/http/endpoints/localai/stores.go
+++ b/core/http/endpoints/localai/stores.go
@@ -36,9 +36,9 @@ func StoresSetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfi
 	}
 }

-func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+func StoresResetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		input := new(schema.StoresDelete)
+		input := new(schema.StoresReset)

 		if err := c.BodyParser(input); err != nil {
 			return err
@@ -49,7 +49,7 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
 			return err
 		}

-		if err := store.DeleteCols(c.Context(), sb, input.Keys); err != nil {
+		if _, err := sb.StoresReset(c.Context(), nil); err != nil {
 			return err
 		}

@@ -57,37 +57,6 @@ func StoresDeleteEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationCo
 	}
 }

-func StoresGetEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		input := new(schema.StoresGet)
-
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		sb, err := backend.StoreBackend(sl, appConfig, input.Store)
-		if err != nil {
-			return err
-		}
-
-		keys, vals, err := store.GetCols(c.Context(), sb, input.Keys)
-		if err != nil {
-			return err
-		}
-
-		res := schema.StoresGetResponse{
-			Keys:   keys,
-			Values: make([]string, len(vals)),
-		}
-
-		for i, v := range vals {
-			res.Values[i] = string(v)
-		}
-
-		return c.JSON(res)
-	}
-}
-
 func StoresFindEndpoint(sl *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(schema.StoresFind)
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 		}

 		if m == "" {
-			m = "stablediffusion"
+			m = model.StableDiffusionBackend
 		}
 		log.Debug().Msgf("Loading model: %+v", m)

@@ -129,9 +129,9 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon

 		switch config.Backend {
 		case "stablediffusion":
-			config.Backend = model.StableDiffusionGGMLBackend
+			config.Backend = model.StableDiffusionBackend
 		case "":
-			config.Backend = model.StableDiffusionGGMLBackend
+			config.Backend = model.StableDiffusionBackend
 		}

 		if !strings.Contains(input.Size, "x") {
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"strconv"

 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
@@ -297,14 +296,6 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 			}
 		}
 	}
-
-	// If a quality was defined as number, convert it to step
-	if input.Quality != "" {
-		q, err := strconv.Atoi(input.Quality)
-		if err == nil {
-			config.Step = q
-		}
-	}
 }

 func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -39,8 +39,7 @@ func RegisterLocalAIRoutes(router *fiber.App,
 	// Stores
 	sl := model.NewModelLoader("")
 	router.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
-	router.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
-	router.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
+	router.Post("/stores/reset", localai.StoresDeleteEndpoint(sl, appConfig))
 	router.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))

 	if !appConfig.DisableMetrics {
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -47,21 +47,8 @@ type StoresSet struct {
 	Values []string    `json:"values" yaml:"values"`
 }

-type StoresDelete struct {
+type StoresReset struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys [][]float32 `json:"keys"`
-}
-
-type StoresGet struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys [][]float32 `json:"keys" yaml:"keys"`
-}
-
-type StoresGetResponse struct {
-	Keys   [][]float32 `json:"keys" yaml:"keys"`
-	Values []string    `json:"values" yaml:"values"`
 }

 type StoresFind struct {
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -191,9 +191,8 @@ type OpenAIRequest struct {
 	Stream bool `json:"stream"`

 	// Image (not supported by OpenAI)
-	Mode    int    `json:"mode"`
-	Quality string `json:"quality"`
-	Step    int    `json:"step"`
+	Mode int `json:"mode"`
+	Step int `json:"step"`

 	// A grammar to constrain the LLM output
 	Grammar string `json:"grammar" yaml:"grammar"`
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5219,23 +5219,6 @@
    - filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
      sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
      uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "deepseek-r1-distill-llama-8b"
-  icon: "https://avatars.githubusercontent.com/u/148330874"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF
-  description: |
-    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
-    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
-    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
-  overrides:
-    parameters:
-      model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
-  files:
-    - filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
-      sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
-      uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
 - &deepseek  ## Deepseek
  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
  name: "deepseek-coder-v2-lite-instruct"
@@ -5301,86 +5284,6 @@
    - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
      sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
      uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
- &deepseek-r1  ## Start DeepSeek-R1
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
-  name: "deepseek-r1-distill-qwen-1.5b"
-  icon: "https://avatars.githubusercontent.com/u/148330874"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
-    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
-  description: |
-    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
-    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
-    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
-      sha256: 1741e5b2d062b07acf048bf0d2c514dadf2a48f94e2b4aa0cfe069af3838ee2f
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
-  name: "deepseek-r1-distill-qwen-7b"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
-    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
-      sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
-  name: "deepseek-r1-distill-qwen-14b"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
-    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
-      sha256: 0b319bd0572f2730bfe11cc751defe82045fad5085b4e60591ac2cd2d9633181
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
-  name: "deepseek-r1-distill-qwen-32b"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
-    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
-      sha256: bed9b0f551f5b95bf9da5888a48f0f87c37ad6b72519c4cbd775f54ac0b9fc62
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
-  name: "deepseek-r1-distill-llama-8b"
-  icon: "https://avatars.githubusercontent.com/u/148330874"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
-      sha256: 87bcba20b4846d8dadf753d3ff48f9285d131fc95e3e0e7e934d4f20bc896f5d
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
- !!merge <<: *deepseek-r1
-  name: "deepseek-r1-distill-llama-70b"
-  icon: "https://avatars.githubusercontent.com/u/148330874"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
-    - https://huggingface.co/bartowski/DeepSeek-R 1-Distill-Llama-70B-GGUF
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
-      sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
 - &qwen2  ## Start QWEN2
  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
  name: "qwen2-7b-instruct"
@@ -5714,32 +5617,6 @@
    - filename: marco-o1-uncensored.Q4_K_M.gguf
      sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
      uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "minicpm-o-2_6"
-  icon: https://avatars.githubusercontent.com/u/89920203
-  urls:
-    - https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf
-    - https://huggingface.co/openbmb/MiniCPM-o-2_6
-  description: |
-    MiniCPM-o 2.6 is the latest and most capable model in the MiniCPM-o series. The model is built in an end-to-end fashion based on SigLip-400M, Whisper-medium-300M, ChatTTS-200M, and Qwen2.5-7B with a total of 8B parameters
-  tags:
-    - llm
-    - multimodal
-    - gguf
-    - gpu
-    - qwen2
-    - cpu
-  overrides:
-    mmproj: minicpm-o-2_6-mmproj-f16.gguf
-    parameters:
-      model: minicpm-o-2_6-Q4_K_M.gguf
-  files:
-    - filename: minicpm-o-2_6-Q4_K_M.gguf
-      sha256: 4f635fc0c0bb88d50ccd9cf1f1e5892b5cb085ff88fe0d8e1148fd9a8a836bc2
-      uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/Model-7.6B-Q4_K_M.gguf
-    - filename: minicpm-o-2_6-mmproj-f16.gguf
-      sha256: efa4f7d96aa0f838f2023fc8d28e519179b16f1106777fa9280b32628191aa3e
-      uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/mmproj-model-f16.gguf
 - !!merge <<: *qwen2
  name: "minicpm-v-2_6"
  license: apache-2.0
@@ -11137,7 +11014,7 @@
      uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
      sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
 - name: stable-diffusion-3-medium
-  icon: https://avatars.githubusercontent.com/u/100950301
+  icon: https://huggingface.co/leo009/stable-diffusion-3-medium/resolve/main/sd3demo.jpg
  license: other
  description: |
    Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
@@ -11151,63 +11028,6 @@
    - sd-3
    - gpu
  url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
- name: sd-1.5-ggml
-  icon: https://avatars.githubusercontent.com/u/37351293
-  license: creativeml-openrail-m
-  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
-  description: |
-    Stable Diffusion 1.5
-  urls:
-    - https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF
-  tags:
-    - text-to-image
-    - stablediffusion
-    - gpu
-    - cpu
-  overrides:
-    options:
-      - "sampler:euler"
-    parameters:
-      model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
-  files:
-    - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
-      sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
-      uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
- name: sd-3.5-medium-ggml
-  license: stabilityai-ai-community
-  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
-  description: |
-    Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
-  urls:
-    - https://huggingface.co/stabilityai/stable-diffusion-3.5-medium
-    - https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF
-  tags:
-    - text-to-image
-    - stablediffusion
-    - gpu
-    - cpu
-  icon: https://avatars.githubusercontent.com/u/100950301
-  overrides:
-    options:
-      - "clip_l_path:clip_l-Q4_0.gguf"
-      - "clip_g_path:clip_g-Q4_0.gguf"
-      - "t5xxl_path:t5xxl-Q4_0.gguf"
-      - "sampler:euler"
-    parameters:
-      model: sd3.5_medium-Q4_0.gguf
-  files:
-    - filename: "sd3.5_medium-Q4_0.gguf"
-      sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf"
-      uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf"
-    - filename: clip_g-Q4_0.gguf
-      sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8
-      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf
-    - filename: clip_l-Q4_0.gguf
-      sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a
-      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf
-    - filename: t5xxl-Q4_0.gguf
-      sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7
-      uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf
 - name: sd-3.5-large-ggml
  license: stabilityai-ai-community
  url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
@@ -11218,10 +11038,10 @@
    - https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
  tags:
    - text-to-image
-    - stablediffusion
+    - flux
    - gpu
    - cpu
-  icon: https://avatars.githubusercontent.com/u/100950301
+  icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png
  overrides:
    parameters:
      model: sd3.5_large-Q4_0.gguf
@@ -11240,7 +11060,6 @@
      uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
 - &flux
  name: flux.1-dev
-  icon: https://avatars.githubusercontent.com/u/164064024
  license: flux-1-dev-non-commercial-license
  description: |
    FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
@@ -11264,6 +11083,7 @@
 - !!merge <<: *flux
  name: flux.1-schnell
  license: apache-2
+  icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
  description: |
    FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
    Key Features
@@ -11296,6 +11116,7 @@
    - flux
    - gpu
    - cpu
+  icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
  overrides:
    parameters:
      model: flux1-dev-Q2_K.gguf
@@ -11315,7 +11136,6 @@
 - &whisper  ## Whisper
  url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
  name: "whisper-1"
-  icon: https://avatars.githubusercontent.com/u/14957082
  license: "MIT"
  urls:
    - https://github.com/ggerganov/whisper.cpp
@@ -11493,7 +11313,6 @@
  description: |
    Stable Diffusion in NCNN with c++, supported txt2img and img2img
  name: stablediffusion-cpp
-  icon: https://avatars.githubusercontent.com/u/100950301
 - &piper  ## Piper TTS
  url: github:mudler/LocalAI/gallery/piper.yaml@master
  name: voice-en-us-kathleen-low
@@ -12074,7 +11893,6 @@
      uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
      sha256: 0299a5e7f481ba853404e9f0e1515a94d5409585d76963fa4d30c64bd630aa99
 - name: "silero-vad"
-  icon: https://github.com/snakers4/silero-models/raw/master/files/silero_logo.jpg
  url: github:mudler/LocalAI/gallery/virtual.yaml@master
  urls:
    - https://github.com/snakers4/silero-vad
@@ -12094,7 +11912,6 @@
      uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
      sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
 - name: "bark-cpp-small"
-  icon: https://avatars.githubusercontent.com/u/99442120
  url: github:mudler/LocalAI/gallery/virtual.yaml@master
  license: mit
  urls:
--- a/go.mod
+++ b/go.mod
@@ -93,6 +93,7 @@ require (
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/nikolalohinski/gonja/v2 v2.3.2 // indirect
+	github.com/philippgille/chromem-go v0.7.0 // indirect
 	github.com/pion/datachannel v1.5.10 // indirect
 	github.com/pion/dtls/v2 v2.2.12 // indirect
 	github.com/pion/ice/v2 v2.3.37 // indirect
--- a/go.sum
+++ b/go.sum
@@ -611,6 +611,8 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1H
 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
 github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
 github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
+github.com/philippgille/chromem-go v0.7.0 h1:4jfvfyKymjKNfGxBUhHUcj1kp7B17NL/I1P+vGh1RvY=
+github.com/philippgille/chromem-go v0.7.0/go.mod h1:hTd+wGEm/fFPQl7ilfCwQXkgEUxceYh86iIdoKMolPo=
 github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pion/datachannel v1.5.8 h1:ph1P1NsGkazkjrvyMfhRBUAWMxugJjq2HfQifaOoSNo=
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -21,16 +21,14 @@ import (
 )

 const (
-	HuggingFacePrefix  = "huggingface://"
-	HuggingFacePrefix1 = "hf://"
-	HuggingFacePrefix2 = "hf.co/"
-	OCIPrefix          = "oci://"
-	OllamaPrefix       = "ollama://"
-	HTTPPrefix         = "http://"
-	HTTPSPrefix        = "https://"
-	GithubURI          = "github:"
-	GithubURI2         = "github://"
-	LocalPrefix        = "file://"
+	HuggingFacePrefix = "huggingface://"
+	OCIPrefix         = "oci://"
+	OllamaPrefix      = "ollama://"
+	HTTPPrefix        = "http://"
+	HTTPSPrefix       = "https://"
+	GithubURI         = "github:"
+	GithubURI2        = "github://"
+	LocalPrefix       = "file://"
 )

 type URI string
@@ -129,8 +127,6 @@ func (u URI) LooksLikeURL() bool {
 	return strings.HasPrefix(string(u), HTTPPrefix) ||
 		strings.HasPrefix(string(u), HTTPSPrefix) ||
 		strings.HasPrefix(string(u), HuggingFacePrefix) ||
-		strings.HasPrefix(string(u), HuggingFacePrefix1) ||
-		strings.HasPrefix(string(u), HuggingFacePrefix2) ||
 		strings.HasPrefix(string(u), GithubURI) ||
 		strings.HasPrefix(string(u), OllamaPrefix) ||
 		strings.HasPrefix(string(u), OCIPrefix) ||
@@ -174,10 +170,8 @@ func (s URI) ResolveURL() string {
 		projectPath := strings.Join(repoPath[2:], "/")

 		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	case strings.HasPrefix(string(s), HuggingFacePrefix) || strings.HasPrefix(string(s), HuggingFacePrefix1) || strings.HasPrefix(string(s), HuggingFacePrefix2):
+	case strings.HasPrefix(string(s), HuggingFacePrefix):
 		repository := strings.Replace(string(s), HuggingFacePrefix, "", 1)
-		repository = strings.Replace(repository, HuggingFacePrefix1, "", 1)
-		repository = strings.Replace(repository, HuggingFacePrefix2, "", 1)
 		// convert repository to a full URL.
 		// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
 		owner := strings.Split(repository, "/")[0]
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -34,7 +34,7 @@ type Tool struct {
 }
 type Tools []Tool

-// ToJSONStructure converts a list of functions to a JSON structure that can be parsed to a grammar
+// ToJSONNameStructure converts a list of functions to a JSON structure that can be parsed to a grammar
 // This allows the LLM to return a response of the type: { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
 	nameKey := defaultFunctionNameKey
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -46,8 +46,7 @@ type Backend interface {
 	Status(ctx context.Context) (*pb.StatusResponse, error)

 	StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error)
-	StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error)
-	StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error)
+	StoresReset(ctx context.Context, in *pb.StoresResetOptions, opts ...grpc.CallOption) (*pb.Result, error)
 	StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)

 	Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -80,11 +80,7 @@ func (llm *Base) StoresSet(*pb.StoresSetOptions) error {
 	return fmt.Errorf("unimplemented")
 }

-func (llm *Base) StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error) {
-	return pb.StoresGetResult{}, fmt.Errorf("unimplemented")
-}
-
-func (llm *Base) StoresDelete(*pb.StoresDeleteOptions) error {
+func (llm *Base) StoresReset(*pb.StoresResetOptions) error {
 	return fmt.Errorf("unimplemented")
 }

--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -303,7 +303,7 @@ func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ..
 	return client.StoresSet(ctx, in, opts...)
 }

-func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
+func (c *Client) StoreReset(ctx context.Context, in *pb.StoresResetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
 	if !c.parallel {
 		c.opMutex.Lock()
 		defer c.opMutex.Unlock()
@@ -318,25 +318,7 @@ func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, o
 	}
 	defer conn.Close()
 	client := pb.NewBackendClient(conn)
-	return client.StoresDelete(ctx, in, opts...)
-}
-
-func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
-	if !c.parallel {
-		c.opMutex.Lock()
-		defer c.opMutex.Unlock()
-	}
-	c.setBusy(true)
-	defer c.setBusy(false)
-	c.wdMark()
-	defer c.wdUnMark()
-	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
-	if err != nil {
-		return nil, err
-	}
-	defer conn.Close()
-	client := pb.NewBackendClient(conn)
-	return client.StoresGet(ctx, in, opts...)
+	return client.StoresReset(ctx, in, opts...)
 }

 func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -71,12 +71,8 @@ func (e *embedBackend) StoresSet(ctx context.Context, in *pb.StoresSetOptions, o
 	return e.s.StoresSet(ctx, in)
 }

-func (e *embedBackend) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, opts ...grpc.CallOption) (*pb.Result, error) {
-	return e.s.StoresDelete(ctx, in)
-}
-
-func (e *embedBackend) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ...grpc.CallOption) (*pb.StoresGetResult, error) {
-	return e.s.StoresGet(ctx, in)
+func (e *embedBackend) StoresReset(ctx context.Context, in *pb.StoresResetOptions, opts ...grpc.CallOption) (*pb.Result, error) {
+	return e.s.StoresReset(ctx, in)
 }

 func (e *embedBackend) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error) {
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -21,8 +21,7 @@ type LLM interface {
 	Status() (pb.StatusResponse, error)

 	StoresSet(*pb.StoresSetOptions) error
-	StoresDelete(*pb.StoresDeleteOptions) error
-	StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error)
+	StoresReset(*pb.StoresResetOptions) error
 	StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)

 	VAD(*pb.VADRequest) (pb.VADResponse, error)
--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@@ -191,28 +191,16 @@ func (s *server) StoresSet(ctx context.Context, in *pb.StoresSetOptions) (*pb.Re
 	return &pb.Result{Message: "Set key", Success: true}, nil
 }

-func (s *server) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions) (*pb.Result, error) {
+func (s *server) StoresReset(ctx context.Context, in *pb.StoresResetOptions) (*pb.Result, error) {
 	if s.llm.Locking() {
 		s.llm.Lock()
 		defer s.llm.Unlock()
 	}
-	err := s.llm.StoresDelete(in)
+	err := s.llm.StoresReset(in)
 	if err != nil {
 		return &pb.Result{Message: fmt.Sprintf("Error deleting entry: %s", err.Error()), Success: false}, err
 	}
-	return &pb.Result{Message: "Deleted key", Success: true}, nil
-}
-
-func (s *server) StoresGet(ctx context.Context, in *pb.StoresGetOptions) (*pb.StoresGetResult, error) {
-	if s.llm.Locking() {
-		s.llm.Lock()
-		defer s.llm.Unlock()
-	}
-	res, err := s.llm.StoresGet(in)
-	if err != nil {
-		return nil, err
-	}
-	return &res, nil
+	return &pb.Result{Message: "Deleted mem db", Success: true}, nil
 }

 func (s *server) StoresFind(ctx context.Context, in *pb.StoresFindOptions) (*pb.StoresFindResult, error) {
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -29,14 +29,11 @@ var Aliases map[string]string = map[string]string{
 	"langchain-huggingface":  LCHuggingFaceBackend,
 	"transformers-musicgen":  TransformersBackend,
 	"sentencetransformers":   TransformersBackend,
-	"mamba":                  TransformersBackend,
-	"stablediffusion":        StableDiffusionGGMLBackend,
 }

 var TypeAlias map[string]string = map[string]string{
 	"sentencetransformers":   "SentenceTransformer",
 	"huggingface-embeddings": "SentenceTransformer",
-	"mamba":                  "Mamba",
 	"transformers-musicgen":  "MusicgenForConditionalGeneration",
 }

@@ -48,7 +45,6 @@ const (
 	LLamaCPP = "llama-cpp"

 	LLamaCPPAVX2     = "llama-cpp-avx2"
-	LLamaCPPAVX512   = "llama-cpp-avx512"
 	LLamaCPPAVX      = "llama-cpp-avx"
 	LLamaCPPFallback = "llama-cpp-fallback"
 	LLamaCPPCUDA     = "llama-cpp-cuda"
@@ -58,27 +54,15 @@ const (

 	LLamaCPPGRPC = "llama-cpp-grpc"

-	WhisperBackend             = "whisper"
-	StableDiffusionGGMLBackend = "stablediffusion-ggml"
-	PiperBackend               = "piper"
-	LCHuggingFaceBackend       = "huggingface"
+	WhisperBackend         = "whisper"
+	StableDiffusionBackend = "stablediffusion"
+	PiperBackend           = "piper"
+	LCHuggingFaceBackend   = "huggingface"

 	TransformersBackend = "transformers"
 	LocalStoreBackend   = "local-store"
 )

-var llamaCPPVariants = []string{
-	LLamaCPPAVX2,
-	LLamaCPPAVX512,
-	LLamaCPPAVX,
-	LLamaCPPFallback,
-	LLamaCPPCUDA,
-	LLamaCPPHipblas,
-	LLamaCPPSycl16,
-	LLamaCPPSycl32,
-	LLamaCPPGRPC,
-}
-
 func backendPath(assetDir, backend string) string {
 	return filepath.Join(assetDir, "backend-assets", "grpc", backend)
 }
@@ -120,14 +104,40 @@ ENTRY:
 	if AutoDetect {
 		// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
 		// when starting the service
-		foundVariants := map[string]bool{}
+		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas, foundSycl16, foundSycl32 := false, false, false, false, false, false, false, false
 		if _, ok := backends[LLamaCPP]; !ok {
 			for _, e := range entry {
-				for _, v := range llamaCPPVariants {
-					if strings.Contains(e.Name(), v) && !foundVariants[v] {
-						backends[LLamaCPP] = append(backends[LLamaCPP], v)
-						foundVariants[v] = true
-					}
+				if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2)
+					foundLCPPAVX2 = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX)
+					foundLCPPAVX = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
+					foundLCPPFallback = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPGRPC) && !foundLCPPGRPC {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
+					foundLCPPGRPC = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
+					foundLCPPCuda = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
+					foundLCPPHipblas = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPSycl16) && !foundSycl16 {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl16)
+					foundSycl16 = true
+				}
+				if strings.Contains(e.Name(), LLamaCPPSycl32) && !foundSycl32 {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl32)
+					foundSycl32 = true
 				}
 			}
 		}
@@ -270,12 +280,6 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
 			log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
 			selectedProcess = p
 		}
-	} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
-		p := backendPath(assetDir, LLamaCPPAVX512)
-		if _, err := os.Stat(p); err == nil {
-			log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
-			selectedProcess = p
-		}
 	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
 		p := backendPath(assetDir, LLamaCPPAVX)
 		if _, err := os.Stat(p); err == nil {
--- a/pkg/stablediffusion/generate.go
+++ b/pkg/stablediffusion/generate.go
@@ -0,0 +1,35 @@
+//go:build stablediffusion
+// +build stablediffusion
+
+package stablediffusion
+
+import (
+	stableDiffusion "github.com/mudler/go-stable-diffusion"
+)
+
+func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
+	if height > 512 || width > 512 {
+		return stableDiffusion.GenerateImageUpscaled(
+			height,
+			width,
+			step,
+			seed,
+			positive_prompt,
+			negative_prompt,
+			dst,
+			asset_dir,
+		)
+	}
+	return stableDiffusion.GenerateImage(
+		height,
+		width,
+		mode,
+		step,
+		seed,
+		positive_prompt,
+		negative_prompt,
+		dst,
+		"",
+		asset_dir,
+	)
+}
--- a/pkg/stablediffusion/generate_unsupported.go
+++ b/pkg/stablediffusion/generate_unsupported.go
@@ -0,0 +1,10 @@
+//go:build !stablediffusion
+// +build !stablediffusion
+
+package stablediffusion
+
+import "fmt"
+
+func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
+	return fmt.Errorf("This version of LocalAI was built without the stablediffusion tag")
+}
--- a/pkg/stablediffusion/stablediffusion.go
+++ b/pkg/stablediffusion/stablediffusion.go
@@ -0,0 +1,20 @@
+package stablediffusion
+
+import "os"
+
+type StableDiffusion struct {
+	assetDir string
+}
+
+func New(assetDir string) (*StableDiffusion, error) {
+	if _, err := os.Stat(assetDir); err != nil {
+		return nil, err
+	}
+	return &StableDiffusion{
+		assetDir: assetDir,
+	}, nil
+}
+
+func (s *StableDiffusion) GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string) error {
+	return GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst, s.assetDir)
+}
--- a/pkg/store/client.go
+++ b/pkg/store/client.go
@@ -1,155 +0,0 @@
-package store
-
-import (
-	"context"
-	"fmt"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-	"github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-// Wrapper for the GRPC client so that simple use cases are handled without verbosity
-
-// SetCols sets multiple key-value pairs in the store
-// It's in columnar format so that keys[i] is associated with values[i]
-func SetCols(ctx context.Context, c grpc.Backend, keys [][]float32, values [][]byte) error {
-	protoKeys := make([]*proto.StoresKey, len(keys))
-	for i, k := range keys {
-		protoKeys[i] = &proto.StoresKey{
-			Floats: k,
-		}
-	}
-	protoValues := make([]*proto.StoresValue, len(values))
-	for i, v := range values {
-		protoValues[i] = &proto.StoresValue{
-			Bytes: v,
-		}
-	}
-	setOpts := &proto.StoresSetOptions{
-		Keys:   protoKeys,
-		Values: protoValues,
-	}
-
-	res, err := c.StoresSet(ctx, setOpts)
-	if err != nil {
-		return err
-	}
-
-	if res.Success {
-		return nil
-	}
-
-	return fmt.Errorf("failed to set keys: %v", res.Message)
-}
-
-// SetSingle sets a single key-value pair in the store
-// Don't call this in a tight loop, instead use SetCols
-func SetSingle(ctx context.Context, c grpc.Backend, key []float32, value []byte) error {
-	return SetCols(ctx, c, [][]float32{key}, [][]byte{value})
-}
-
-// DeleteCols deletes multiple key-value pairs from the store
-// It's in columnar format so that keys[i] is associated with values[i]
-func DeleteCols(ctx context.Context, c grpc.Backend, keys [][]float32) error {
-	protoKeys := make([]*proto.StoresKey, len(keys))
-	for i, k := range keys {
-		protoKeys[i] = &proto.StoresKey{
-			Floats: k,
-		}
-	}
-	deleteOpts := &proto.StoresDeleteOptions{
-		Keys: protoKeys,
-	}
-
-	res, err := c.StoresDelete(ctx, deleteOpts)
-	if err != nil {
-		return err
-	}
-
-	if res.Success {
-		return nil
-	}
-
-	return fmt.Errorf("failed to delete keys: %v", res.Message)
-}
-
-// DeleteSingle deletes a single key-value pair from the store
-// Don't call this in a tight loop, instead use DeleteCols
-func DeleteSingle(ctx context.Context, c grpc.Backend, key []float32) error {
-	return DeleteCols(ctx, c, [][]float32{key})
-}
-
-// GetCols gets multiple key-value pairs from the store
-// It's in columnar format so that keys[i] is associated with values[i]
-// Be warned the keys are sorted and will be returned in a different order than they were input
-// There is no guarantee as to how the keys are sorted
-func GetCols(ctx context.Context, c grpc.Backend, keys [][]float32) ([][]float32, [][]byte, error) {
-	protoKeys := make([]*proto.StoresKey, len(keys))
-	for i, k := range keys {
-		protoKeys[i] = &proto.StoresKey{
-			Floats: k,
-		}
-	}
-	getOpts := &proto.StoresGetOptions{
-		Keys: protoKeys,
-	}
-
-	res, err := c.StoresGet(ctx, getOpts)
-	if err != nil {
-		return nil, nil, err
-	}
-
-	ks := make([][]float32, len(res.Keys))
-	for i, k := range res.Keys {
-		ks[i] = k.Floats
-	}
-	vs := make([][]byte, len(res.Values))
-	for i, v := range res.Values {
-		vs[i] = v.Bytes
-	}
-
-	return ks, vs, nil
-}
-
-// GetSingle gets a single key-value pair from the store
-// Don't call this in a tight loop, instead use GetCols
-func GetSingle(ctx context.Context, c grpc.Backend, key []float32) ([]byte, error) {
-	_, values, err := GetCols(ctx, c, [][]float32{key})
-	if err != nil {
-		return nil, err
-	}
-
-	if len(values) > 0 {
-		return values[0], nil
-	}
-
-	return nil, fmt.Errorf("failed to get key")
-}
-
-// Find similar keys to the given key. Returns the keys, values, and similarities
-func Find(ctx context.Context, c grpc.Backend, key []float32, topk int) ([][]float32, [][]byte, []float32, error) {
-	findOpts := &proto.StoresFindOptions{
-		Key: &proto.StoresKey{
-			Floats: key,
-		},
-		TopK: int32(topk),
-	}
-
-	res, err := c.StoresFind(ctx, findOpts)
-	if err != nil {
-		return nil, nil, nil, err
-	}
-
-	ks := make([][]float32, len(res.Keys))
-	vs := make([][]byte, len(res.Values))
-
-	for i, k := range res.Keys {
-		ks[i] = k.Floats
-	}
-
-	for i, v := range res.Values {
-		vs[i] = v.Bytes
-	}
-
-	return ks, vs, res.Similarities, nil
-}
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -1645,9 +1645,6 @@ const docTemplate = `{
                "prompt": {
                    "description": "Prompt is read only by completion/image API calls"
                },
-                "quality": {
-                    "type": "string"
-                },
                "repeat_last_n": {
                    "type": "integer"
                },
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -1638,9 +1638,6 @@
                "prompt": {
                    "description": "Prompt is read only by completion/image API calls"
                },
-                "quality": {
-                    "type": "string"
-                },
                "repeat_last_n": {
                    "type": "integer"
                },
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -570,8 +570,6 @@ definitions:
        type: number
      prompt:
        description: Prompt is read only by completion/image API calls
-      quality:
-        type: string
      repeat_last_n:
        type: integer
      repeat_penalty:
--- a/tests/e2e-aio/e2e_suite_test.go
+++ b/tests/e2e-aio/e2e_suite_test.go
@@ -54,7 +54,7 @@ var _ = BeforeSuite(func() {
 	Eventually(func() error {
 		_, err := client.ListModels(context.TODO())
 		return err
-	}, "50m").ShouldNot(HaveOccurred())
+	}, "20m").ShouldNot(HaveOccurred())
 })

 var _ = AfterSuite(func() {
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -123,9 +123,8 @@ var _ = Describe("E2E test", func() {
 			It("correctly", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
-						Prompt:  "test",
-						Quality: "1",
-						Size:    openai.CreateImageSize256x256,
+						Prompt: "test",
+						Size:   openai.CreateImageSize512x512,
 					},
 				)
 				Expect(err).ToNot(HaveOccurred())
@@ -136,8 +135,7 @@ var _ = Describe("E2E test", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
 						Prompt:         "test",
-						Size:           openai.CreateImageSize256x256,
-						Quality:        "1",
+						Size:           openai.CreateImageSize512x512,
 						ResponseFormat: openai.CreateImageResponseFormatURL,
 					},
 				)
@@ -149,8 +147,7 @@ var _ = Describe("E2E test", func() {
 				resp, err := client.CreateImage(context.TODO(),
 					openai.ImageRequest{
 						Prompt:         "test",
-						Size:           openai.CreateImageSize256x256,
-						Quality:        "1",
+						Size:           openai.CreateImageSize512x512,
 						ResponseFormat: openai.CreateImageResponseFormatB64JSON,
 					},
 				)
--- a/tests/integration/stores_test.go
+++ b/tests/integration/stores_test.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"embed"
 	"math"
-	"math/rand"
 	"os"
 	"path/filepath"

@@ -23,19 +22,6 @@ import (
 //go:embed backend-assets/*
 var backendAssets embed.FS

-func normalize(vecs [][]float32) {
-	for i, k := range vecs {
-		norm := float64(0)
-		for _, x := range k {
-			norm += float64(x * x)
-		}
-		norm = math.Sqrt(norm)
-		for j, x := range k {
-			vecs[i][j] = x / float32(norm)
-		}
-	}
-}
-
 var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
 	Context("Embedded Store get,set and delete", func() {
 		var sl *model.ModelLoader
@@ -84,6 +70,10 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 		})

 		It("should be able to set a key", func() {
+			sc.StoresSet(context.Background(), &store.StoresSetOptions{
+				Keys:   [][]float32{{0.1, 0.2, 0.3}},
+				Values: [][]byte{[]byte("test")},
+			})
 			err := store.SetSingle(context.Background(), sc, []float32{0.1, 0.2, 0.3}, []byte("test"))
 			Expect(err).ToNot(HaveOccurred())
 		})
@@ -206,8 +196,17 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 			// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
 			keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
 			vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
-
-			normalize(keys)
+			// normalize the keys
+			for i, k := range keys {
+				norm := float64(0)
+				for _, x := range k {
+					norm += float64(x * x)
+				}
+				norm = math.Sqrt(norm)
+				for j, x := range k {
+					keys[i][j] = x / float32(norm)
+				}
+			}

 			err := store.SetCols(context.Background(), sc, keys, vals)
 			Expect(err).ToNot(HaveOccurred())
@@ -230,121 +229,5 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
 			Expect(ks[1]).To(Equal(keys[1]))
 			Expect(vals[1]).To(Equal(vals[1]))
 		})
-
-		It("It produces the correct cosine similarities for orthogonal and opposite unit vectors", func() {
-			keys := [][]float32{{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}}
-			vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
-
-			err := store.SetCols(context.Background(), sc, keys, vals);
-			Expect(err).ToNot(HaveOccurred())
-
-			_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(sims).To(Equal([]float32{1.0, 0.0, 0.0, -1.0}))
-		})
-
-		It("It produces the correct cosine similarities for orthogonal and opposite vectors", func() {
-			keys := [][]float32{{1.0, 0.0, 1.0}, {0.0, 2.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, -1.0}}
-			vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
-
-			err := store.SetCols(context.Background(), sc, keys, vals);
-			Expect(err).ToNot(HaveOccurred())
-
-			_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(sims[0]).To(BeNumerically("~", 1, 0.1))
-			Expect(sims[1]).To(BeNumerically("~", 0, 0.1))
-			Expect(sims[2]).To(BeNumerically("~", -0.7, 0.1))
-			Expect(sims[3]).To(BeNumerically("~", -1, 0.1))
-		})
-
-		expectTriangleEq := func(keys [][]float32, vals [][]byte) {
-			sims := map[string]map[string]float32{}
-
-			// compare every key vector pair and store the similarities in a lookup table
-			// that uses the values as keys
-			for i, k := range keys {
-				_, valsk, simsk, err := store.Find(context.Background(), sc, k, 9)
-				Expect(err).ToNot(HaveOccurred())
-
-				for j, v := range valsk {
-					p := string(vals[i])
-					q := string(v)
-
-					if sims[p] == nil {
-						sims[p] = map[string]float32{}
-					}
-
-					//log.Debug().Strs("vals", []string{p, q}).Float32("similarity", simsk[j]).Send()
-
-					sims[p][q] = simsk[j]
-				}
-			}
-
-			// Check that the triangle inequality holds for every combination of the triplet
-			// u, v and w
-			for _, simsu := range sims {
-				for w, simw := range simsu {
-					// acos(u,w) <= ...
-					uws := math.Acos(float64(simw))
-
-					// ... acos(u,v) + acos(v,w)
-					for v, _ := range simsu {
-						uvws := math.Acos(float64(simsu[v])) + math.Acos(float64(sims[v][w]))
-
-						//log.Debug().Str("u", u).Str("v", v).Str("w", w).Send()
-						//log.Debug().Float32("uw", simw).Float32("uv", simsu[v]).Float32("vw", sims[v][w]).Send()
-						Expect(uws).To(BeNumerically("<=", uvws))
-					}
-				}
-			}
-		}
-
-		It("It obeys the triangle inequality for normalized values", func() {
-			keys := [][]float32{
-				{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0},
-				{-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, -1.0},
-				{2.0, 3.0, 4.0}, {9.0, 7.0, 1.0}, {0.0, -1.2, 2.3},
-			}
-			vals := [][]byte{
-				[]byte("x"), []byte("y"), []byte("z"),
-				[]byte("-x"), []byte("-y"), []byte("-z"),
-				[]byte("u"), []byte("v"), []byte("w"),
-			}
-
-			normalize(keys[6:])
-
-			err := store.SetCols(context.Background(), sc, keys, vals);
-			Expect(err).ToNot(HaveOccurred())
-
-			expectTriangleEq(keys, vals)
-		})
-
-		It("It obeys the triangle inequality", func() {
-			rnd := rand.New(rand.NewSource(151))
-			keys := make([][]float32, 20)
-			vals := make([][]byte, 20)
-
-			for i := range keys {
-				k := make([]float32, 768)
-
-				for j := range k {
-					k[j] = rnd.Float32()
-				}
-
-				keys[i] = k
-			}
-
-			c := byte('a')
-			for i := range vals {
-				vals[i] = []byte{c}
-				c += 1
-			}
-
-			err := store.SetCols(context.Background(), sc, keys, vals);
-			Expect(err).ToNot(HaveOccurred())
-
-			expectTriangleEq(keys, vals)
-		})
 	})
 })