fix(ci): install latest git

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 11:13:31 -05:00 · 2024-10-24 14:55:24 +02:00
293 changed files with 10231 additions and 4800 deletions
--- a/Requests/model
+++ b/Requests/model
@@ -1,11 +0,0 @@
-meta {
-  name: model delete
-  type: http
-  seq: 7
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/models/galleries
-  body: none
-  auth: none
-}
--- a/Requests/transcription/gb1.ogg
+++ b/Requests/transcription/gb1.ogg
--- a/Requests/transcription/transcribe.bru
+++ b/Requests/transcription/transcribe.bru
@@ -1,16 +0,0 @@
-meta {
-  name: transcribe
-  type: http
-  seq: 1
-}
-
-post {
-  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/audio/transcriptions
-  body: multipartForm
-  auth: none
-}
-
-body:multipart-form {
-  file: @file(transcription/gb1.ogg)
-  model: whisper-1
-}
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1 @@
 *.sh text eol=lf
-backend/cpp/llama/*.hpp linguist-vendored
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -1,11 +1,6 @@
 enhancements:
 - head-branch: ['^feature', 'feature']

-dependencies:
- any:
-  - changed-files:
-    - any-glob-to-any-file: 'Makefile'
-
 kind/documentation:
 - any:
  - changed-files:
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -12,14 +12,23 @@ jobs:
          - repository: "ggerganov/llama.cpp"
            variable: "CPPLLAMA_VERSION"
            branch: "master"
+          - repository: "go-skynet/go-ggml-transformers.cpp"
+            variable: "GOGGMLTRANSFORMERS_VERSION"
+            branch: "master"
+          - repository: "donomii/go-rwkv.cpp"
+            variable: "RWKV_VERSION"
+            branch: "main"
          - repository: "ggerganov/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
-          - repository: "PABannier/bark.cpp"
-            variable: "BARKCPP_VERSION"
+          - repository: "go-skynet/go-bert.cpp"
+            variable: "BERT_VERSION"
+            branch: "master"
+          - repository: "go-skynet/bloomz.cpp"
+            variable: "BLOOMZ_VERSION"
            branch: "main"
-          - repository: "leejet/stable-diffusion.cpp"
-            variable: "STABLEDIFFUSION_GGML_VERSION"
+          - repository: "mudler/go-ggllm.cpp"
+            variable: "GOGGLLM_VERSION"
            branch: "master"
          - repository: "mudler/go-stable-diffusion"
            variable: "STABLEDIFFUSION_VERSION"
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -23,7 +23,7 @@ jobs:
          sudo pip install --upgrade pip
          pip install huggingface_hub
      - name: 'Setup yq'
-        uses: dcarbone/install-yq-action@v1.3.1
+        uses: dcarbone/install-yq-action@v1.1.1
        with:
          version: 'v4.44.2'
          download-compressed: true
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -33,7 +33,7 @@ jobs:
        run: |
          CGO_ENABLED=0 make build-api
      - name: rm
-        uses: appleboy/ssh-action@v1.2.0
+        uses: appleboy/ssh-action@v1.1.0
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
            rm: true
            target: ./local-ai
      - name: restarting
-        uses: appleboy/ssh-action@v1.2.0
+        uses: appleboy/ssh-action@v1.1.0
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -105,6 +105,14 @@ jobs:
  tests-parler-tts:
    runs-on: ubuntu-latest
    steps:
+      - name: Force Install GIT latest
+        run: |
+          sudo apt-get update \
+          && sudo apt-get install -y software-properties-common \
+          && sudo apt-get update \
+          && sudo add-apt-repository -y ppa:git-core/ppa \
+          && sudo apt-get update \
+          && sudo apt-get install -y git
      - name: Clone
        uses: actions/checkout@v4
        with:
@@ -123,13 +131,6 @@ jobs:
        run: |
           make --jobs=5 --output-sync=target -C backend/python/parler-tts
           make --jobs=5 --output-sync=target -C backend/python/parler-tts test
-      - name: Setup tmate session if tests fail
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3.19
-        with:
-          detached: true
-          connect-timeout-seconds: 180
-          limit-access-to-actor: true

  tests-openvoice:
    runs-on: ubuntu-latest
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -224,7 +224,7 @@ jobs:
      - name: Dependencies
        run: |
          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
-          pip install --user --no-cache-dir grpcio-tools
+          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,6 @@
 /sources/
 __pycache__/
 *.a
-*.o
 get-sources
 prepare-sources
 /backend/cpp/llama/grpc-server
@@ -13,6 +12,7 @@ prepare-sources

 go-ggml-transformers
 go-gpt2
+go-rwkv
 whisper.cpp
 /bloomz
 go-bert
--- a/3
+++ b/3
@@ -85,8 +85,7 @@ WORKDIR /build
 # The requirements-extras target is for any builds with IMAGE_TYPE=extras. It should not be placed in this target unless every IMAGE_TYPE=extras build will use it
 FROM requirements-core AS requirements-extras

-# Install uv as a system package
-RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
 ENV PATH="/root/.cargo/bin:${PATH}"

 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
--- a/194
+++ b/194
@@ -8,15 +8,23 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=cc98896db858df7aa40d0e16a505883ef196a482
+CPPLLAMA_VERSION?=0a1c750c80147687df267114c81956757cc14382
+
+# go-rwkv version
+RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
+RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
+WHISPER_CPP_VERSION?=0fbaac9c891055796456df7b9122a70c220f9ca1
+
+# bert.cpp version
+BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
+BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

 # go-piper version
 PIPER_REPO?=https://github.com/mudler/go-piper
-PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
+PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759

 # stablediffusion version
 STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
@@ -26,18 +34,6 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
 TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057

-# bark.cpp
-BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
-BARKCPP_VERSION?=v1.0.0
-
-# stablediffusion.cpp (ggml)
-STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=4570715727f35e5a07a76796d823824c8f42206c
-
-ONNX_VERSION?=1.20.0
-ONNX_ARCH?=x64
-ONNX_OS?=linux
-
 export BUILD_TYPE?=
 export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
 export CMAKE_ARGS?=
@@ -49,7 +45,6 @@ CGO_LDFLAGS_WHISPER+=-lggml
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
-NATIVE?=false

 TEST_DIR=/tmp/test

@@ -88,25 +83,7 @@ ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif

-# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
-ifeq ($(NATIVE),false)
-	CMAKE_ARGS+=-DGGML_NATIVE=OFF
-endif
-
-# Detect if we are running on arm64
-ifneq (,$(findstring aarch64,$(shell uname -m)))
-	ONNX_ARCH=aarch64
-endif
-
 ifeq ($(OS),Darwin)
-	ONNX_OS=osx
-	ifneq (,$(findstring aarch64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else ifneq (,$(findstring arm64,$(shell uname -m)))
-		ONNX_ARCH=arm64
-	else
-		ONNX_ARCH=x86_64
-	endif

 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
@@ -161,10 +138,10 @@ ifeq ($(BUILD_TYPE),hipblas)
 	export CC=$(ROCM_HOME)/llvm/bin/clang
 	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
-	export GGML_HIP=1
+	export GGML_HIPBLAS=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
 	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
 endif

@@ -202,23 +179,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
 endif

 ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
+ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
+ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
-
-ifeq ($(ONNX_OS),linux)
-ifeq ($(ONNX_ARCH),x64)
-	ALL_GRPC_BACKENDS+=backend-assets/grpc/bark-cpp
-	ALL_GRPC_BACKENDS+=backend-assets/grpc/stablediffusion-ggml
-endif
-endif
-
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
-ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
 ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
 # Use filter-out to remove the specified backends
 ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
@@ -239,6 +209,19 @@ endif

 all: help

+## BERT embeddings
+sources/go-bert.cpp:
+	mkdir -p sources/go-bert.cpp
+	cd sources/go-bert.cpp && \
+	git init && \
+	git remote add origin $(BERT_REPO) && \
+	git fetch origin && \
+	git checkout $(BERT_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
+	$(MAKE) -C sources/go-bert.cpp libgobert.a
+
 ## go-llama.cpp
 sources/go-llama.cpp:
 	mkdir -p sources/go-llama.cpp
@@ -252,23 +235,6 @@ sources/go-llama.cpp:
 sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
 	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a

-## bark.cpp
-sources/bark.cpp:
-	git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
-	cd sources/bark.cpp && \
-	git checkout $(BARKCPP_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/bark.cpp/build/libbark.a: sources/bark.cpp
-	cd sources/bark.cpp && \
-	mkdir -p build && \
-	cd build && \
-	cmake $(CMAKE_ARGS) .. && \
-	cmake --build . --config Release
-
-backend/go/bark/libbark.a: sources/bark.cpp/build/libbark.a
-	$(MAKE) -C backend/go/bark libbark.a
-
 ## go-piper
 sources/go-piper:
 	mkdir -p sources/go-piper
@@ -282,7 +248,21 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o

-## stable diffusion (onnx)
+
+## RWKV
+sources/go-rwkv.cpp:
+	mkdir -p sources/go-rwkv.cpp
+	cd sources/go-rwkv.cpp && \
+	git init && \
+	git remote add origin $(RWKV_REPO) && \
+	git fetch origin && \
+	git checkout $(RWKV_VERSION) && \
+	git submodule update --init --recursive --depth 1 --single-branch
+
+sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
+	cd sources/go-rwkv.cpp && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
+
+## stable diffusion
 sources/go-stable-diffusion:
 	mkdir -p sources/go-stable-diffusion
 	cd sources/go-stable-diffusion && \
@@ -295,44 +275,6 @@ sources/go-stable-diffusion:
 sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
 	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a

-## stablediffusion (ggml)
-sources/stablediffusion-ggml.cpp:
-	git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
-	cd sources/stablediffusion-ggml.cpp && \
-	git checkout $(STABLEDIFFUSION_GGML_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a: sources/stablediffusion-ggml.cpp
-	cd sources/stablediffusion-ggml.cpp && \
-	mkdir -p build && \
-	cd build && \
-	cmake $(CMAKE_ARGS) .. && \
-	cmake --build . --config Release
-
-backend/go/image/stablediffusion-ggml/libsd.a: sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a
-	$(MAKE) -C backend/go/image/stablediffusion-ggml libsd.a
-
-backend-assets/grpc/stablediffusion-ggml: backend/go/image/stablediffusion-ggml/libsd.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ LIBRARY_PATH=$(CURDIR)/backend/go/image/stablediffusion-ggml/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion-ggml ./backend/go/image/stablediffusion-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion-ggml
-endif
-
-sources/onnxruntime:
-	mkdir -p sources/onnxruntime
-	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
-	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
-
-backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
-	cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
-ifeq ($(OS),Darwin)
-	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
-else
-	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
-endif
-
 ## tiny-dream
 sources/go-tiny-dream:
 	mkdir -p sources/go-tiny-dream
@@ -359,19 +301,23 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

-get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp

 replace:
+	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
 	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp

 dropreplace:
+	$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
+	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
 	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
@@ -384,8 +330,10 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
+	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
+	$(MAKE) -C sources/go-bert.cpp clean
 	$(MAKE) -C sources/go-piper clean
 	$(MAKE) -C sources/go-tiny-dream clean
 	$(MAKE) build
@@ -400,9 +348,7 @@ clean: ## Remove build related file
 	rm -rf release/
 	rm -rf backend-assets/*
 	$(MAKE) -C backend/cpp/grpc clean
-	$(MAKE) -C backend/go/bark clean
 	$(MAKE) -C backend/cpp/llama clean
-	$(MAKE) -C backend/go/image/stablediffusion-ggml clean
 	rm -rf backend/cpp/llama-* || true
 	$(MAKE) dropreplace
 	$(MAKE) protogen-clean
@@ -493,6 +439,8 @@ test-models/testmodel.ggml:
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
+	wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
+	wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
 	cp tests/models_fixtures/* test-models

 prepare-test: grpcs
@@ -745,6 +693,13 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
 backend-assets/grpc: protogen-go replace
 	mkdir -p backend-assets/grpc

+backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/bert-embeddings
+endif
+
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
 ifneq ($(UPX),)
@@ -804,6 +759,10 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
+# TODO: every binary should have its own folder instead, so can have different metal implementations
+ifeq ($(BUILD_TYPE),metal)
+	cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
+endif

 backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-cuda
@@ -816,7 +775,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
 	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
 	$(MAKE) -C backend/cpp/llama-hipblas purge
 	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
+	BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas

 backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -851,13 +810,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/llama-ggml
 endif

-backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bark-cpp
-endif
-
 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
@@ -865,6 +817,13 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/piper
 endif

+backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/rwkv
+endif
+
 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
@@ -872,13 +831,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/stablediffusion
 endif

-backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/silero-vad
-endif
-
 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
@@ -939,7 +891,7 @@ docker-aio-all:

 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -947,7 +899,7 @@ docker-image-intel:

 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--- a/README.md
+++ b/README.md
@@ -38,13 +38,9 @@
 </a>
 </p>

-<p align="center">
-<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
-</p>
-
 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
 >
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) 
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) 

 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)

@@ -60,17 +56,14 @@ curl https://localai.io/install.sh | sh

 Or run with docker:
 ```bash
-# CPU only image:
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
-
-# Nvidia GPU:
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
-
-# CPU and GPU image (bigger size):
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
-
-# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
+# Alternative images:
+# - if you have an Nvidia GPU:
+# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
+# - without preconfigured models
+# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
+# - without preconfigured models for Nvidia GPUs
+# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 
 ```

 To load models:
@@ -92,10 +85,6 @@ local-ai run oci://localai/phi-2:latest

 ## 📰 Latest project news

- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
- Nov 2024: Bark.cpp backend added ( https://github.com/mudler/LocalAI/pull/4287 )
- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
- Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
 - Aug 2024:  🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
 - July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
 - June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
@@ -166,9 +155,6 @@ Other:
 - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
 - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
 - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Another Telegram Bot https://github.com/JackBekket/Hellper
- Auto-documentation https://github.com/JackBekket/Reflexia
- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
 - Github Actions: https://github.com/marketplace/actions/start-localai
 - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
  
@@ -243,6 +229,7 @@ LocalAI couldn't have been built without the help of great software already avai
 - https://github.com/antimatter15/alpaca.cpp
 - https://github.com/EdVince/Stable-Diffusion-NCNN
 - https://github.com/ggerganov/whisper.cpp
+- https://github.com/saharNooby/rwkv.cpp
 - https://github.com/rhasspy/piper

 ## 🤗 Contributors
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,7 +1,7 @@
 name: text-embedding-ada-002
-embeddings: true
+backend: bert-embeddings
 parameters:
-  model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
+  model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin

 usage: |
    You can test this model with curl like this:
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -28,8 +28,6 @@ service Backend {
  rpc Rerank(RerankRequest) returns (RerankResult) {}

  rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
-
-  rpc VAD(VADRequest) returns (VADResponse) {}
 }

 // Define the empty request
@@ -235,13 +233,6 @@ message ModelOptions {

  bool FlashAttention = 56;
  bool NoKVOffload = 57;
-
-  string ModelPath = 59;
-
-  repeated string LoraAdapters = 60;
-  repeated float LoraScales = 61;
-
-  repeated string Options = 62;
 }

 message Result {
@@ -297,19 +288,6 @@ message TTSRequest {
  optional string language = 5;
 }

-message VADRequest {
-  repeated float audio = 1;
-}
-
-message VADSegment {
-  float start = 1;
-  float end = 2;
-}
-
-message VADResponse {
-  repeated VADSegment segments = 1;
-}
-
 message SoundGenerationRequest {
  string text = 1;
  string model = 2;
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas)
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DGGML_HIP=ON
+	CMAKE_ARGS+=-DGGML_HIPBLAS=ON
 # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
 # But if it's OSX without metal, disable it here
 else ifeq ($(OS),Darwin)
@@ -30,7 +30,9 @@ else ifeq ($(OS),Darwin)
 		CMAKE_ARGS+=-DGGML_METAL=OFF
 	else
 		CMAKE_ARGS+=-DGGML_METAL=ON
-		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=ON
+# Until this is tested properly, we disable embedded metal file
+# as we already embed it as part of the LocalAI assets
+		CMAKE_ARGS+=-DGGML_METAL_EMBED_LIBRARY=OFF
 		TARGET+=--target ggml-metal
 	endif
 endif
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -203,7 +203,7 @@ struct llama_client_slot
    std::string stopping_word;

    // sampling
-    struct common_params_sampling sparams;
+    struct common_sampler_params sparams;
    common_sampler *ctx_sampling = nullptr;

    int32_t ga_i = 0;   // group-attention state
@@ -662,7 +662,7 @@ struct llama_server_context

    bool launch_slot_with_data(llama_client_slot* &slot, json data) {
        slot_params default_params;
-        common_params_sampling default_sparams;
+        common_sampler_params default_sparams;
 
        slot->params.stream             = json_value(data, "stream",            false);
        slot->params.cache_prompt       = json_value(data, "cache_prompt",      false);
@@ -670,6 +670,7 @@ struct llama_server_context
        slot->sparams.top_k             = json_value(data, "top_k",             default_sparams.top_k);
        slot->sparams.top_p             = json_value(data, "top_p",             default_sparams.top_p);
        slot->sparams.min_p             = json_value(data, "min_p",             default_sparams.min_p);
+        slot->sparams.tfs_z             = json_value(data, "tfs_z",             default_sparams.tfs_z);
        slot->sparams.typ_p             = json_value(data, "typical_p",         default_sparams.typ_p);
        slot->sparams.temp              = json_value(data, "temperature",       default_sparams.temp);
        slot->sparams.dynatemp_range    = json_value(data, "dynatemp_range",    default_sparams.dynatemp_range);
@@ -1205,6 +1206,7 @@ struct llama_server_context
            {"top_k",             slot.sparams.top_k},
            {"top_p",             slot.sparams.top_p},
            {"min_p",             slot.sparams.min_p},
+            {"tfs_z",             slot.sparams.tfs_z},
            {"typical_p",         slot.sparams.typ_p},
            {"repeat_last_n",     slot.sparams.penalty_last_n},
            {"repeat_penalty",    slot.sparams.penalty_repeat},
@@ -2103,6 +2105,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    //     slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
    //     slot->sparams.top_k           = json_value(data, "top_k",             default_sparams.top_k);
    //     slot->sparams.top_p           = json_value(data, "top_p",             default_sparams.top_p);
+    //     slot->sparams.tfs_z           = json_value(data, "tfs_z",             default_sparams.tfs_z);
    //     slot->sparams.typical_p       = json_value(data, "typical_p",         default_sparams.typical_p);
    //     slot->sparams.temp            = json_value(data, "temperature",       default_sparams.temp);
    //     slot->sparams.penalty_last_n  = json_value(data, "repeat_last_n",     default_sparams.penalty_last_n);
@@ -2126,6 +2129,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
    data["n_predict"] = predict->tokens() == 0 ? -1 : predict->tokens();
    data["top_k"] = predict->topk();
    data["top_p"] = predict->topp();
+    data["tfs_z"] = predict->tailfreesamplingz();
    data["typical_p"] = predict->typicalp();
    data["temperature"] = predict->temperature();
    data["repeat_last_n"] = predict->repeat();
@@ -2172,6 +2176,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
 //     llama.params.n_predict = predict->tokens() == 0 ? -1 : predict->tokens();
 //     llama.params.sparams.top_k = predict->topk();
 //     llama.params.sparams.top_p = predict->topp();
+//     llama.params.sparams.tfs_z = predict->tailfreesamplingz();
 //     llama.params.sparams.typical_p = predict->typicalp();
 //     llama.params.sparams.penalty_last_n = predict->repeat();
 //     llama.params.sparams.temp = predict->temperature();
@@ -2299,7 +2304,6 @@ static void params_parse(const backend::ModelOptions* request,
    params.use_mmap = request->mmap();
    params.flash_attn = request->flashattention();
    params.no_kv_offload = request->nokvoffload();
-    params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)

    params.embedding = request->embeddings();

--- a/backend/go/bark/Makefile
+++ b/backend/go/bark/Makefile
@@ -1,25 +0,0 @@
-INCLUDE_PATH := $(abspath ./)
-LIBRARY_PATH := $(abspath ./)
-
-AR?=ar
-
-BUILD_TYPE?=
-# keep standard at C11 and C++11
-CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../sources/bark.cpp/examples -I$(INCLUDE_PATH)/../../../sources/bark.cpp/spm-headers -I$(INCLUDE_PATH)/../../../sources/bark.cpp -O3 -DNDEBUG -std=c++17 -fPIC
-LDFLAGS  = -L$(LIBRARY_PATH) -L$(LIBRARY_PATH)/../../../sources/bark.cpp/build/examples -lbark -lstdc++ -lm
-
-# warnings
-CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
-
-gobark.o:
-	$(CXX) $(CXXFLAGS) gobark.cpp -o gobark.o -c $(LDFLAGS)
-
-libbark.a: gobark.o
-	cp $(INCLUDE_PATH)/../../../sources/bark.cpp/build/libbark.a ./
-	$(AR) rcs libbark.a gobark.o
-	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml.c.o
-	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-alloc.c.o
-	$(AR) rcs libbark.a $(LIBRARY_PATH)/../../../sources/bark.cpp/build/encodec.cpp/ggml/src/CMakeFiles/ggml.dir/ggml-backend.c.o
-
-clean:
-	rm -f gobark.o libbark.a
--- a/backend/go/bark/gobark.cpp
+++ b/backend/go/bark/gobark.cpp
@@ -1,85 +0,0 @@
-#include <iostream>
-#include <tuple>
-
-#include "bark.h"
-#include "gobark.h"
-#include "common.h"
-#include "ggml.h"
-
-struct bark_context *c;
-
-void bark_print_progress_callback(struct bark_context *bctx, enum bark_encoding_step step, int progress, void *user_data) {
-    if (step == bark_encoding_step::SEMANTIC) {
-        printf("\rGenerating semantic tokens... %d%%", progress);
-    } else if (step == bark_encoding_step::COARSE) {
-        printf("\rGenerating coarse tokens... %d%%", progress);
-    } else if (step == bark_encoding_step::FINE) {
-        printf("\rGenerating fine tokens... %d%%", progress);
-    }
-    fflush(stdout);
-}
-
-int load_model(char *model) {
-    // initialize bark context
-    struct bark_context_params ctx_params = bark_context_default_params();
-    bark_params params;
-
-    params.model_path = model;
-
-   // ctx_params.verbosity = verbosity;
-    ctx_params.progress_callback = bark_print_progress_callback;
-    ctx_params.progress_callback_user_data = nullptr;
-
-    struct bark_context *bctx = bark_load_model(params.model_path.c_str(), ctx_params, params.seed);
-    if (!bctx) {
-        fprintf(stderr, "%s: Could not load model\n", __func__);
-        return 1;
-    }
-
-    c = bctx;
-
-    return 0;
-}
-
-int tts(char *text,int  threads, char *dst ) {
-
-    ggml_time_init();
-    const int64_t t_main_start_us = ggml_time_us();
-
-    // generate audio
-    if (!bark_generate_audio(c, text, threads)) {
-        fprintf(stderr, "%s: An error occured. If the problem persists, feel free to open an issue to report it.\n", __func__);
-        return 1;
-    }
-
-    const float *audio_data = bark_get_audio_data(c);
-    if (audio_data == NULL) {
-        fprintf(stderr, "%s: Could not get audio data\n", __func__);
-        return 1;
-    }
-
-    const int audio_arr_size = bark_get_audio_data_size(c);
-
-    std::vector<float> audio_arr(audio_data, audio_data + audio_arr_size);
-
-    write_wav_on_disk(audio_arr, dst);
-
-    // report timing
-    {
-        const int64_t t_main_end_us = ggml_time_us();
-        const int64_t t_load_us = bark_get_load_time(c);
-        const int64_t t_eval_us = bark_get_eval_time(c);
-
-        printf("\n\n");
-        printf("%s:     load time = %8.2f ms\n", __func__, t_load_us / 1000.0f);
-        printf("%s:     eval time = %8.2f ms\n", __func__, t_eval_us / 1000.0f);
-        printf("%s:    total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us) / 1000.0f);
-    }
-    
-    return 0;
-}
-
-int unload() {
-    bark_free(c);
-}
-
--- a/backend/go/bark/gobark.go
+++ b/backend/go/bark/gobark.go
@@ -1,52 +0,0 @@
-package main
-
-// #cgo CXXFLAGS: -I${SRCDIR}/../../../sources/bark.cpp/ -I${SRCDIR}/../../../sources/bark.cpp/encodec.cpp -I${SRCDIR}/../../../sources/bark.cpp/examples -I${SRCDIR}/../../../sources/bark.cpp/spm-headers
-// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../sources/bark.cpp/build/examples -L${SRCDIR}/../../../sources/bark.cpp/build/encodec.cpp/ -lbark -lencodec -lcommon
-// #include <gobark.h>
-// #include <stdlib.h>
-import "C"
-
-import (
-	"fmt"
-	"unsafe"
-
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-type Bark struct {
-	base.SingleThread
-	threads int
-}
-
-func (sd *Bark) Load(opts *pb.ModelOptions) error {
-
-	sd.threads = int(opts.Threads)
-
-	modelFile := C.CString(opts.ModelFile)
-	defer C.free(unsafe.Pointer(modelFile))
-
-	ret := C.load_model(modelFile)
-	if ret != 0 {
-		return fmt.Errorf("inference failed")
-	}
-
-	return nil
-}
-
-func (sd *Bark) TTS(opts *pb.TTSRequest) error {
-	t := C.CString(opts.Text)
-	defer C.free(unsafe.Pointer(t))
-
-	dst := C.CString(opts.Dst)
-	defer C.free(unsafe.Pointer(dst))
-
-	threads := C.int(sd.threads)
-
-	ret := C.tts(t, threads, dst)
-	if ret != 0 {
-		return fmt.Errorf("inference failed")
-	}
-
-	return nil
-}
--- a/backend/go/bark/gobark.h
+++ b/backend/go/bark/gobark.h
@@ -1,8 +0,0 @@
-#ifdef __cplusplus
-extern "C" {
-#endif
-int load_model(char *model);
-int tts(char *text,int  threads, char *dst );
-#ifdef __cplusplus
-}
-#endif
--- a/backend/go/image/stablediffusion-ggml/Makefile
+++ b/backend/go/image/stablediffusion-ggml/Makefile
@@ -1,21 +0,0 @@
-INCLUDE_PATH := $(abspath ./)
-LIBRARY_PATH := $(abspath ./)
-
-AR?=ar
-
-BUILD_TYPE?=
-# keep standard at C11 and C++11
-CXXFLAGS = -I. -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/ggml/include -I$(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp -O3 -DNDEBUG -std=c++17 -fPIC
-
-# warnings
-CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
-
-gosd.o:
-	$(CXX) $(CXXFLAGS) gosd.cpp -o gosd.o -c
-
-libsd.a: gosd.o
-	cp $(INCLUDE_PATH)/../../../../sources/stablediffusion-ggml.cpp/build/libstable-diffusion.a ./libsd.a
-	$(AR) rcs libsd.a gosd.o
-
-clean:
-	rm -f gosd.o libsd.a
--- a/backend/go/image/stablediffusion-ggml/gosd.cpp
+++ b/backend/go/image/stablediffusion-ggml/gosd.cpp
@@ -1,228 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include <iostream>
-#include <random>
-#include <string>
-#include <vector>
-#include "gosd.h"
-
-// #include "preprocessing.hpp"
-#include "flux.hpp"
-#include "stable-diffusion.h"
-
-#define STB_IMAGE_IMPLEMENTATION
-#define STB_IMAGE_STATIC
-#include "stb_image.h"
-
-#define STB_IMAGE_WRITE_IMPLEMENTATION
-#define STB_IMAGE_WRITE_STATIC
-#include "stb_image_write.h"
-
-#define STB_IMAGE_RESIZE_IMPLEMENTATION
-#define STB_IMAGE_RESIZE_STATIC
-#include "stb_image_resize.h"
-
-// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
-const char* sample_method_str[] = {
-    "euler_a",
-    "euler",
-    "heun",
-    "dpm2",
-    "dpm++2s_a",
-    "dpm++2m",
-    "dpm++2mv2",
-    "ipndm",
-    "ipndm_v",
-    "lcm",
-};
-
-// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
-const char* schedule_str[] = {
-    "default",
-    "discrete",
-    "karras",
-    "exponential",
-    "ays",
-    "gits",
-};
-
-sd_ctx_t* sd_c;
-
-sample_method_t sample_method;
-
-int load_model(char *model, char* options[], int threads, int diff) {
-    fprintf (stderr, "Loading model!\n");
-
-    char *stableDiffusionModel = "";
-    if (diff == 1 ) {
-        stableDiffusionModel = model;
-        model = "";
-    }
-
-    // decode options. Options are in form optname:optvale, or if booleans only optname.
-    char *clip_l_path  = "";
-    char *clip_g_path  = "";
-    char *t5xxl_path  = "";
-    char *vae_path  = "";
-    char *scheduler = "";
-    char *sampler = "";
-
-    // If options is not NULL, parse options
-    for (int i = 0; options[i] != NULL; i++) {
-        char *optname = strtok(options[i], ":");
-        char *optval = strtok(NULL, ":");
-        if (optval == NULL) {
-            optval = "true";
-        }
-
-        if (!strcmp(optname, "clip_l_path")) {
-            clip_l_path = optval;
-        }
-        if (!strcmp(optname, "clip_g_path")) {
-            clip_g_path = optval;
-        }
-        if (!strcmp(optname, "t5xxl_path")) {
-            t5xxl_path = optval;
-        }
-        if (!strcmp(optname, "vae_path")) {
-            vae_path = optval;
-        }
-        if (!strcmp(optname, "scheduler")) {
-            scheduler = optval;
-        }
-        if (!strcmp(optname, "sampler")) {
-            sampler = optval;
-        }
-    }
-
-    int sample_method_found = -1;
-    for (int m = 0; m < N_SAMPLE_METHODS; m++) {
-        if (!strcmp(sampler, sample_method_str[m])) {
-            sample_method_found = m;
-        }
-    }
-    if (sample_method_found == -1) {
-        fprintf(stderr, "Invalid sample method, default to EULER_A!\n");
-        sample_method_found = EULER_A;
-    }
-    sample_method = (sample_method_t)sample_method_found;
-
-    int schedule_found            = -1;
-    for (int d = 0; d < N_SCHEDULES; d++) {
-        if (!strcmp(scheduler, schedule_str[d])) {
-            schedule_found = d;
-                fprintf (stderr, "Found scheduler: %s\n", scheduler);
-
-        }
-    }
-
-    if (schedule_found == -1) {
-        fprintf (stderr, "Invalid scheduler! using DEFAULT\n");
-        schedule_found = DEFAULT;
-    }
-
-    schedule_t schedule = (schedule_t)schedule_found;
-    
-    fprintf (stderr, "Creating context\n");
-    sd_ctx_t* sd_ctx = new_sd_ctx(model,
-                                  clip_l_path,
-                                  clip_g_path,
-                                  t5xxl_path,
-                                  stableDiffusionModel,
-                                  vae_path,
-                                  "",
-                                  "",
-                                  "",
-                                  "",
-                                  "",
-                                  false,
-                                  false,
-                                  false,
-                                  threads,
-                                  SD_TYPE_COUNT,
-                                  STD_DEFAULT_RNG,
-                                  schedule,
-                                  false,
-                                  false,
-                                  false,
-                                  false);
-
-    if (sd_ctx == NULL) {
-        fprintf (stderr, "failed loading model (generic error)\n");
-        return 1;
-    }
-    fprintf (stderr, "Created context: OK\n");
-
-    sd_c = sd_ctx;
-
-    return 0;
-}
-
-int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed , char *dst, float cfg_scale) {
-
-    sd_image_t* results;
-
-    std::vector<int> skip_layers = {7, 8, 9};
-
-    fprintf (stderr, "Generating image\n");
-
-    results = txt2img(sd_c,
-                            text,
-                            negativeText,
-                            -1, //clip_skip
-                            cfg_scale, // sfg_scale
-                            3.5f,
-                            width,
-                            height,
-                            sample_method, 
-                            steps,
-                            seed,
-                            1,
-                            NULL,
-                            0.9f,
-                            20.f,
-                            false,
-                            "",
-                            skip_layers.data(),
-                            skip_layers.size(),
-                            0,
-                            0.01,
-                            0.2);
-
-    if (results == NULL) {
-        fprintf (stderr, "NO results\n");
-        return 1;
-    }
-
-    if (results[0].data == NULL) {
-        fprintf (stderr, "Results with no data\n");
-        return 1;
-    }
-
-    fprintf (stderr, "Writing PNG\n");
-
-    fprintf (stderr, "DST: %s\n", dst);
-    fprintf (stderr, "Width: %d\n", results[0].width);
-    fprintf (stderr, "Height: %d\n", results[0].height);
-    fprintf (stderr, "Channel: %d\n", results[0].channel);
-    fprintf (stderr, "Data: %p\n", results[0].data);
-
-    stbi_write_png(dst, results[0].width, results[0].height, results[0].channel,
-                       results[0].data, 0, NULL);
-    fprintf (stderr, "Saved resulting image to '%s'\n", dst);
-
-    // TODO: free results. Why does it crash?
-
-    free(results[0].data);
-    results[0].data = NULL;
-    free(results);
-    fprintf (stderr, "gen_image is done", dst);
-
-    return 0;
-}
-
-int unload() {
-    free_sd_ctx(sd_c);
-}
-
--- a/backend/go/image/stablediffusion-ggml/gosd.go
+++ b/backend/go/image/stablediffusion-ggml/gosd.go
@@ -1,96 +0,0 @@
-package main
-
-// #cgo CXXFLAGS: -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/thirdparty -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp -I${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/ggml/include
-// #cgo LDFLAGS: -L${SRCDIR}/ -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src/ggml-cpu -L${SRCDIR}/../../../../sources/stablediffusion-ggml.cpp/build/ggml/src -lsd -lstdc++ -lm -lggml -lggml-base -lggml-cpu -lgomp
-// #include <gosd.h>
-// #include <stdlib.h>
-import "C"
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-	"unsafe"
-
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/mudler/LocalAI/pkg/utils"
-)
-
-type SDGGML struct {
-	base.SingleThread
-	threads      int
-	sampleMethod string
-	cfgScale     float32
-}
-
-func (sd *SDGGML) Load(opts *pb.ModelOptions) error {
-
-	sd.threads = int(opts.Threads)
-
-	modelFile := C.CString(opts.ModelFile)
-	defer C.free(unsafe.Pointer(modelFile))
-
-	var options **C.char
-	// prepare the options array to pass to C
-
-	size := C.size_t(unsafe.Sizeof((*C.char)(nil)))
-	length := C.size_t(len(opts.Options))
-	options = (**C.char)(C.malloc(length * size))
-	view := (*[1 << 30]*C.char)(unsafe.Pointer(options))[0:len(opts.Options):len(opts.Options)]
-
-	var diffusionModel int
-
-	var oo []string
-	for _, op := range opts.Options {
-		if op == "diffusion_model" {
-			diffusionModel = 1
-			continue
-		}
-
-		// If it's an option path, we resolve absolute path from the model path
-		if strings.Contains(op, ":") && strings.Contains(op, "path") {
-			data := strings.Split(op, ":")
-			data[1] = filepath.Join(opts.ModelPath, data[1])
-			if err := utils.VerifyPath(data[1], opts.ModelPath); err == nil {
-				oo = append(oo, strings.Join(data, ":"))
-			}
-		} else {
-			oo = append(oo, op)
-		}
-	}
-
-	fmt.Fprintf(os.Stderr, "Options: %+v\n", oo)
-
-	for i, x := range oo {
-		view[i] = C.CString(x)
-	}
-
-	sd.cfgScale = opts.CFGScale
-
-	ret := C.load_model(modelFile, options, C.int(opts.Threads), C.int(diffusionModel))
-	if ret != 0 {
-		return fmt.Errorf("could not load model")
-	}
-
-	return nil
-}
-
-func (sd *SDGGML) GenerateImage(opts *pb.GenerateImageRequest) error {
-	t := C.CString(opts.PositivePrompt)
-	defer C.free(unsafe.Pointer(t))
-
-	dst := C.CString(opts.Dst)
-	defer C.free(unsafe.Pointer(dst))
-
-	negative := C.CString(opts.NegativePrompt)
-	defer C.free(unsafe.Pointer(negative))
-
-	ret := C.gen_image(t, negative, C.int(opts.Width), C.int(opts.Height), C.int(opts.Step), C.int(opts.Seed), dst, C.float(sd.cfgScale))
-	if ret != 0 {
-		return fmt.Errorf("inference failed")
-	}
-
-	return nil
-}
--- a/backend/go/image/stablediffusion-ggml/gosd.h
+++ b/backend/go/image/stablediffusion-ggml/gosd.h
@@ -1,8 +0,0 @@
-#ifdef __cplusplus
-extern "C" {
-#endif
-int load_model(char *model, char* options[], int threads, int diffusionModel);
-int gen_image(char *text, char *negativeText, int width, int height, int steps, int seed, char *dst, float cfg_scale);
-#ifdef __cplusplus
-}
-#endif
--- a/backend/go/image/stablediffusion-ggml/main.go
+++ b/backend/go/image/stablediffusion-ggml/main.go
@@ -1,20 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each model
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &SDGGML{}); err != nil {
-		panic(err)
-	}
-}
--- a/backend/go/llm/bert/bert.go
+++ b/backend/go/llm/bert/bert.go
@@ -0,0 +1,34 @@
+package main
+
+// This is a wrapper to statisfy the GRPC service interface
+// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
+import (
+	bert "github.com/go-skynet/go-bert.cpp"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+)
+
+type Embeddings struct {
+	base.SingleThread
+	bert *bert.Bert
+}
+
+func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
+	model, err := bert.New(opts.ModelFile)
+	llm.bert = model
+	return err
+}
+
+func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
+
+	if len(opts.EmbeddingTokens) > 0 {
+		tokens := []int{}
+		for _, t := range opts.EmbeddingTokens {
+			tokens = append(tokens, int(t))
+		}
+		return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
+	}
+
+	return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
+}
--- a/backend/go/llm/bert/main.go
+++ b/backend/go/llm/bert/main.go
@@ -1,6 +1,7 @@
 package main

 // Note: this is started internally by LocalAI and a server is allocated for each model
+
 import (
 	"flag"

@@ -14,7 +15,7 @@ var (
 func main() {
 	flag.Parse()

-	if err := grpc.StartServer(*addr, &Bark{}); err != nil {
+	if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
 		panic(err)
 	}
 }
--- a/backend/go/vad/silero/main.go
+++ b/backend/go/vad/silero/main.go
@@ -15,7 +15,7 @@ var (
 func main() {
 	flag.Parse()

-	if err := grpc.StartServer(*addr, &VAD{}); err != nil {
+	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/backend/go/llm/rwkv/rwkv.go
+++ b/backend/go/llm/rwkv/rwkv.go
@@ -0,0 +1,95 @@
+package main
+
+// This is a wrapper to statisfy the GRPC service interface
+// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
+import (
+	"fmt"
+	"path/filepath"
+
+	"github.com/donomii/go-rwkv.cpp"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+)
+
+const tokenizerSuffix = ".tokenizer.json"
+
+type LLM struct {
+	base.SingleThread
+
+	rwkv *rwkv.RwkvState
+}
+
+func (llm *LLM) Load(opts *pb.ModelOptions) error {
+	tokenizerFile := opts.Tokenizer
+	if tokenizerFile == "" {
+		modelFile := filepath.Base(opts.ModelFile)
+		tokenizerFile = modelFile + tokenizerSuffix
+	}
+	modelPath := filepath.Dir(opts.ModelFile)
+	tokenizerPath := filepath.Join(modelPath, tokenizerFile)
+
+	model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
+
+	if model == nil {
+		return fmt.Errorf("rwkv could not load model")
+	}
+	llm.rwkv = model
+	return nil
+}
+
+func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
+	stopWord := "\n"
+	if len(opts.StopPrompts) > 0 {
+		stopWord = opts.StopPrompts[0]
+	}
+
+	if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
+		return "", err
+	}
+
+	response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
+
+	return response, nil
+}
+
+func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
+	go func() {
+
+		stopWord := "\n"
+		if len(opts.StopPrompts) > 0 {
+			stopWord = opts.StopPrompts[0]
+		}
+
+		if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
+			fmt.Println("Error processing input: ", err)
+			return
+		}
+
+		llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
+			results <- s
+			return true
+		})
+		close(results)
+	}()
+
+	return nil
+}
+
+func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
+	tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
+	if err != nil {
+		return pb.TokenizationResponse{}, err
+	}
+
+	l := len(tokens)
+	i32Tokens := make([]int32, l)
+
+	for i, t := range tokens {
+		i32Tokens[i] = int32(t.ID)
+	}
+
+	return pb.TokenizationResponse{
+		Length: int32(l),
+		Tokens: i32Tokens,
+	}, nil
+}
--- a/backend/go/vad/silero/vad.go
+++ b/backend/go/vad/silero/vad.go
@@ -1,54 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/streamer45/silero-vad-go/speech"
-)
-
-type VAD struct {
-	base.SingleThread
-	detector *speech.Detector
-}
-
-func (vad *VAD) Load(opts *pb.ModelOptions) error {
-	v, err := speech.NewDetector(speech.DetectorConfig{
-		ModelPath:  opts.ModelFile,
-		SampleRate: 16000,
-		//WindowSize:           1024,
-		Threshold:            0.5,
-		MinSilenceDurationMs: 0,
-		SpeechPadMs:          0,
-	})
-	if err != nil {
-		return fmt.Errorf("create silero detector: %w", err)
-	}
-
-	vad.detector = v
-	return err
-}
-
-func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
-	audio := req.Audio
-
-	segments, err := vad.detector.Detect(audio)
-	if err != nil {
-		return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
-	}
-
-	vadSegments := []*pb.VADSegment{}
-	for _, s := range segments {
-		vadSegments = append(vadSegments, &pb.VADSegment{
-			Start: float32(s.SpeechStartAt),
-			End:   float32(s.SpeechEndAt),
-		})
-	}
-
-	return pb.VADResponse{
-		Segments: vadSegments,
-	}, nil
-}
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/common/template/Makefile
+++ b/backend/python/common/template/Makefile
@@ -1,9 +1,8 @@
 .DEFAULT_GOAL := install

 .PHONY: install
-install:
+install: protogen
 	bash install.sh
-	$(MAKE) protogen

 .PHONY: protogen
 protogen: backend_pb2_grpc.py backend_pb2.py
@@ -13,7 +12,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	bash protogen.sh
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/common/template/protogen.sh
+++ b/backend/python/common/template/protogen.sh
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,3 +1,2 @@
-grpcio==1.68.1
-protobuf
-grpcio-tools
+grpcio==1.67.0
+protobuf
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
 packaging==24.1
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -247,16 +247,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                        use_safetensors=True,
                        variant=variant)
            elif request.PipelineType == "FluxPipeline":
-                if fromSingleFile:
-                    self.pipe = FluxPipeline.from_single_file(modelFile,
-                                                              torch_dtype=torchType,
-                                                              use_safetensors=True)
-                else:
                    self.pipe = FluxPipeline.from_pretrained(
                        request.Model,
                        torch_dtype=torch.bfloat16)
-                if request.LowVRAM:
-                    self.pipe.enable_model_cpu_offload()
+                    if request.LowVRAM:
+                        self.pipe.enable_model_cpu_offload()
            elif request.PipelineType == "FluxTransformer2DModel":
                    dtype = torch.bfloat16
                    # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
@@ -301,34 +296,22 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                self.pipe.controlnet = self.controlnet
            else:
                self.controlnet = None
-
-            if request.LoraAdapter and not os.path.isabs(request.LoraAdapter):
+            # Assume directory from request.ModelFile.
+            # Only if request.LoraAdapter it's not an absolute path
+            if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
+                # get base path of modelFile
+                modelFileBase = os.path.dirname(request.ModelFile)
                # modify LoraAdapter to be relative to modelFileBase
-                request.LoraAdapter = os.path.join(request.ModelPath, request.LoraAdapter)
-
+                request.LoraAdapter = os.path.join(modelFileBase, request.LoraAdapter)
            device = "cpu" if not request.CUDA else "cuda"
            self.device = device
            if request.LoraAdapter:
                # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
                if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
+                    # self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
                    self.pipe.load_lora_weights(request.LoraAdapter)
                else:
                    self.pipe.unet.load_attn_procs(request.LoraAdapter)
-            if len(request.LoraAdapters) > 0:
-                i = 0
-                adapters_name = []
-                adapters_weights = []
-                for adapter in request.LoraAdapters:
-                    if not os.path.isabs(adapter):
-                        adapter = os.path.join(request.ModelPath, adapter)
-                    self.pipe.load_lora_weights(adapter, adapter_name=f"adapter_{i}")
-                    adapters_name.append(f"adapter_{i}")
-                    i += 1
-
-                for adapters_weight in request.LoraScales:
-                    adapters_weights.append(adapters_weight)
-
-                self.pipe.set_adapters(adapters_name, adapter_weights=adapters_weights)

            if request.CUDA:
                self.pipe.to('cuda')
@@ -409,6 +392,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        # create a dictionary of values for the parameters
        options = {
            "negative_prompt": request.negative_prompt,
+            "width": request.width,
+            "height": request.height,
            "num_inference_steps": steps,
        }

@@ -426,13 +411,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        keys = options.keys()

        if request.EnableParameters != "":
-            keys = [key.strip() for key in request.EnableParameters.split(",")]
+            keys = request.EnableParameters.split(",")

        if request.EnableParameters == "none":
            keys = []

        # create a dictionary of parameters by using the keys from EnableParameters and the values from defaults
-        kwargs = {key: options.get(key) for key in keys if key in options}
+        kwargs = {key: options[key] for key in keys}

        # Set seed
        if request.seed > 0:
@@ -443,12 +428,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        if self.PipelineType == "FluxPipeline":
            kwargs["max_sequence_length"] = 256

-        if request.width:
-            kwargs["width"] = request.width
-
-        if request.height:
-            kwargs["height"] = request.height
-
        if self.PipelineType == "FluxTransformer2DModel":
            kwargs["output_type"] = "pil"
            kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
@@ -468,7 +447,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            export_to_video(video_frames, request.dst)
            return backend_pb2.Result(message="Media generated successfully", success=True)

-        print(f"Generating image with {kwargs=}", file=sys.stderr)
        image = {}
        if COMPEL:
            conditioning, pooled = self.compel.build_conditioning_tensor(prompt)
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.68.1
+grpcio==1.67.0
 pillow
 protobuf
 certifi
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 librosa==0.9.1
 faster-whisper==0.9.0
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 librosa
 faster-whisper
--- a/backend/python/parler-tts/Makefile
+++ b/backend/python/parler-tts/Makefile
@@ -12,10 +12,9 @@ export SKIP_CONDA=1
 endif

 .PHONY: parler-tts
-parler-tts:
+parler-tts: protogen
 	@echo "Installing $(CONDA_ENV_PATH)..."
 	bash install.sh $(CONDA_ENV_PATH)
-	$(MAKE) protogen

 .PHONY: run
 run: protogen
@@ -37,7 +36,7 @@ protogen-clean:
 	$(RM) backend_pb2_grpc.py backend_pb2.py

 backend_pb2_grpc.py backend_pb2.py:
-	bash protogen.sh
+	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto

 .PHONY: clean
 clean: protogen-clean
--- a/backend/python/parler-tts/install.sh
+++ b/backend/python/parler-tts/install.sh
@@ -11,18 +11,16 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi

-
 installRequirements

-
 # https://github.com/descriptinc/audiotools/issues/101
 # incompatible protobuf versions.
-PYDIR=python3.10
-pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
+# PYDIR=python3.10
+# pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"

-if [ ! -d ${pyenv} ]; then
-    echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
-    exit 1
-fi
+# if [ ! -d ${pyenv} ]; then
+#     echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
+#     exit 1
+# fi

-curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
+# curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
--- a/backend/python/parler-tts/protogen.sh
+++ b/backend/python/parler-tts/protogen.sh
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
--- a/backend/python/parler-tts/requirements-after.txt
+++ b/backend/python/parler-tts/requirements-after.txt
@@ -1,4 +1,4 @@
 git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
 llvmlite==0.43.0
 numba==0.60.0
-grpcio-tools==1.42.0
+git+https://github.com/descriptinc/audiotools
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,3 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
+protobuf
 certifi
-llvmlite==0.43.0
+llvmlite==0.43.0
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements-cpu.txt
+++ b/backend/python/sentencetransformers/requirements-cpu.txt
@@ -2,5 +2,5 @@ torch==2.4.1
 accelerate
 transformers
 bitsandbytes
-sentence-transformers==3.3.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch==2.4.1+cu118
 accelerate
-sentence-transformers==3.3.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 torch==2.4.1
 accelerate
-sentence-transformers==3.3.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch==2.4.1+rocm6.0
 accelerate
-sentence-transformers==3.3.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -4,5 +4,5 @@ torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
 accelerate
-sentence-transformers==3.3.1
+sentence-transformers==3.2.0
 transformers
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
 datasets
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
            git clone https://github.com/vllm-project/vllm
        fi
        pushd vllm
-            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.1 protobuf bitsandbytes
+            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.0 protobuf bitsandbytes
            uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
            VLLM_TARGET_DEVICE=cpu python setup.py install
        popd
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.68.1
+grpcio==1.67.0
 protobuf
 certifi
 setuptools
--- a/core/backend/embeddings.go
+++ b/core/backend/embeddings.go
@@ -11,9 +11,17 @@ import (

 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {

-	opts := ModelOptions(backendConfig, appConfig)
+	var inferenceModel interface{}
+	var err error

-	inferenceModel, err := loader.Load(opts...)
+	opts := ModelOptions(backendConfig, appConfig, []model.Option{})
+
+	if backendConfig.Backend == "" {
+		inferenceModel, err = loader.GreedyLoader(opts...)
+	} else {
+		opts = append(opts, model.WithBackendString(backendConfig.Backend))
+		inferenceModel, err = loader.BackendLoader(opts...)
+	}
 	if err != nil {
 		return nil, err
 	}
--- a/core/backend/image.go
+++ b/core/backend/image.go
@@ -9,8 +9,9 @@ import (

 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {

-	opts := ModelOptions(backendConfig, appConfig)
-	inferenceModel, err := loader.Load(
+	opts := ModelOptions(backendConfig, appConfig, []model.Option{})
+
+	inferenceModel, err := loader.BackendLoader(
 		opts...,
 	)
 	if err != nil {
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -16,6 +16,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"

 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/pkg/grpc"
 	"github.com/mudler/LocalAI/pkg/grpc/proto"
 	model "github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
@@ -34,6 +35,15 @@ type TokenUsage struct {
 func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
 	modelFile := c.Model

+	var inferenceModel grpc.Backend
+	var err error
+
+	opts := ModelOptions(c, o, []model.Option{})
+
+	if c.Backend != "" {
+		opts = append(opts, model.WithBackendString(c.Backend))
+	}
+
 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
 	if o.AutoloadGalleries { // experimental
 		if _, err := os.Stat(modelFile); os.IsNotExist(err) {
@@ -46,8 +56,12 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 		}
 	}

-	opts := ModelOptions(c, o)
-	inferenceModel, err := loader.Load(opts...)
+	if c.Backend == "" {
+		inferenceModel, err = loader.GreedyLoader(opts...)
+	} else {
+		inferenceModel, err = loader.BackendLoader(opts...)
+	}
+
 	if err != nil {
 		return nil, err
 	}
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -11,7 +11,7 @@ import (
 	"github.com/rs/zerolog/log"
 )

-func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts ...model.Option) []model.Option {
+func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
 	name := c.Name
 	if name == "" {
 		name = c.Model
@@ -122,17 +122,14 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		CUDA:                 c.CUDA || c.Diffusers.CUDA,
 		SchedulerType:        c.Diffusers.SchedulerType,
 		PipelineType:         c.Diffusers.PipelineType,
-		CFGScale:             c.CFGScale,
+		CFGScale:             c.Diffusers.CFGScale,
 		LoraAdapter:          c.LoraAdapter,
 		LoraScale:            c.LoraScale,
-		LoraAdapters:         c.LoraAdapters,
-		LoraScales:           c.LoraScales,
 		F16Memory:            f16,
 		LoraBase:             c.LoraBase,
 		IMG2IMG:              c.Diffusers.IMG2IMG,
 		CLIPModel:            c.Diffusers.ClipModel,
 		CLIPSubfolder:        c.Diffusers.ClipSubFolder,
-		Options:              c.Options,
 		CLIPSkip:             int32(c.Diffusers.ClipSkip),
 		ControlNet:           c.Diffusers.ControlNet,
 		ContextSize:          int32(ctxSize),
--- a/core/backend/rerank.go
+++ b/core/backend/rerank.go
@@ -11,8 +11,8 @@ import (

 func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {

-	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
-	rerankModel, err := loader.Load(opts...)
+	opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
+	rerankModel, err := loader.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}
--- a/core/backend/soundgeneration.go
+++ b/core/backend/soundgeneration.go
@@ -25,8 +25,9 @@ func SoundGeneration(
 	backendConfig config.BackendConfig,
 ) (string, *proto.Result, error) {

-	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
-	soundGenModel, err := loader.Load(opts...)
+	opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
+
+	soundGenModel, err := loader.BackendLoader(opts...)
 	if err != nil {
 		return "", nil, err
 	}
--- a/core/backend/stores.go
+++ b/core/backend/stores.go
@@ -8,15 +8,16 @@ import (
 )

 func StoreBackend(sl *model.ModelLoader, appConfig *config.ApplicationConfig, storeName string) (grpc.Backend, error) {
-	if storeName == "" {
-		storeName = "default"
-	}
+    if storeName == "" {
+      storeName = "default"
+    }

-	sc := []model.Option{
-		model.WithBackendString(model.LocalStoreBackend),
-		model.WithAssetDir(appConfig.AssetsDestination),
-		model.WithModel(storeName),
-	}
+    sc := []model.Option{
+      model.WithBackendString(model.LocalStoreBackend),
+      model.WithAssetDir(appConfig.AssetsDestination),
+      model.WithModel(storeName),
+    }

-	return sl.Load(sc...)
+    return sl.BackendLoader(sc...)
 }
+
--- a/core/backend/token_metrics.go
+++ b/core/backend/token_metrics.go
@@ -15,8 +15,10 @@ func TokenMetrics(
 	appConfig *config.ApplicationConfig,
 	backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {

-	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
-	model, err := loader.Load(opts...)
+	opts := ModelOptions(backendConfig, appConfig, []model.Option{
+		model.WithModel(modelFile),
+	})
+	model, err := loader.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}
--- a/core/backend/tokenize.go
+++ b/core/backend/tokenize.go
@@ -14,13 +14,15 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
 	var inferenceModel grpc.Backend
 	var err error

-	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
+	opts := ModelOptions(backendConfig, appConfig, []model.Option{
+		model.WithModel(modelFile),
+	})

 	if backendConfig.Backend == "" {
-		inferenceModel, err = loader.Load(opts...)
+		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {
 		opts = append(opts, model.WithBackendString(backendConfig.Backend))
-		inferenceModel, err = loader.Load(opts...)
+		inferenceModel, err = loader.BackendLoader(opts...)
 	}
 	if err != nil {
 		return schema.TokenizeResponse{}, err
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -18,9 +18,9 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
 		backendConfig.Backend = model.WhisperBackend
 	}

-	opts := ModelOptions(backendConfig, appConfig)
+	opts := ModelOptions(backendConfig, appConfig, []model.Option{})

-	transcriptionModel, err := ml.Load(opts...)
+	transcriptionModel, err := ml.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -28,8 +28,11 @@ func ModelTTS(
 		bb = model.PiperBackend
 	}

-	opts := ModelOptions(backendConfig, appConfig, model.WithBackendString(bb), model.WithModel(modelFile))
-	ttsModel, err := loader.Load(opts...)
+	opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
+		model.WithBackendString(bb),
+		model.WithModel(modelFile),
+	})
+	ttsModel, err := loader.BackendLoader(opts...)
 	if err != nil {
 		return "", nil, err
 	}
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -76,14 +76,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 					"util",
 					"llama-cpp-rpc-server",
 				)
-				var extraArgs []string
-
-				if r.ExtraLLamaCPPArgs != "" {
-					extraArgs = strings.Split(r.ExtraLLamaCPPArgs, " ")
-				}
+				extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
 				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
-				log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
-
 				args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)

 				cmd := exec.Command(
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -72,8 +72,6 @@ type BackendConfig struct {

 	Description string `yaml:"description"`
 	Usage       string `yaml:"usage"`
-
-	Options []string `yaml:"options"`
 }

 type File struct {
@@ -99,15 +97,16 @@ type GRPC struct {
 }

 type Diffusers struct {
-	CUDA             bool   `yaml:"cuda"`
-	PipelineType     string `yaml:"pipeline_type"`
-	SchedulerType    string `yaml:"scheduler_type"`
-	EnableParameters string `yaml:"enable_parameters"` // A list of comma separated parameters to specify
-	IMG2IMG          bool   `yaml:"img2img"`           // Image to Image Diffuser
-	ClipSkip         int    `yaml:"clip_skip"`         // Skip every N frames
-	ClipModel        string `yaml:"clip_model"`        // Clip model to use
-	ClipSubFolder    string `yaml:"clip_subfolder"`    // Subfolder to use for clip model
-	ControlNet       string `yaml:"control_net"`
+	CUDA             bool    `yaml:"cuda"`
+	PipelineType     string  `yaml:"pipeline_type"`
+	SchedulerType    string  `yaml:"scheduler_type"`
+	EnableParameters string  `yaml:"enable_parameters"` // A list of comma separated parameters to specify
+	CFGScale         float32 `yaml:"cfg_scale"`         // Classifier-Free Guidance Scale
+	IMG2IMG          bool    `yaml:"img2img"`           // Image to Image Diffuser
+	ClipSkip         int     `yaml:"clip_skip"`         // Skip every N frames
+	ClipModel        string  `yaml:"clip_model"`        // Clip model to use
+	ClipSubFolder    string  `yaml:"clip_subfolder"`    // Subfolder to use for clip model
+	ControlNet       string  `yaml:"control_net"`
 }

 // LLMConfig is a struct that holds the configuration that are
@@ -135,25 +134,23 @@ type LLMConfig struct {
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`

-	ContextSize          *int      `yaml:"context_size"`
-	NUMA                 bool      `yaml:"numa"`
-	LoraAdapter          string    `yaml:"lora_adapter"`
-	LoraBase             string    `yaml:"lora_base"`
-	LoraAdapters         []string  `yaml:"lora_adapters"`
-	LoraScales           []float32 `yaml:"lora_scales"`
-	LoraScale            float32   `yaml:"lora_scale"`
-	NoMulMatQ            bool      `yaml:"no_mulmatq"`
-	DraftModel           string    `yaml:"draft_model"`
-	NDraft               int32     `yaml:"n_draft"`
-	Quantization         string    `yaml:"quantization"`
-	LoadFormat           string    `yaml:"load_format"`
-	GPUMemoryUtilization float32   `yaml:"gpu_memory_utilization"` // vLLM
-	TrustRemoteCode      bool      `yaml:"trust_remote_code"`      // vLLM
-	EnforceEager         bool      `yaml:"enforce_eager"`          // vLLM
-	SwapSpace            int       `yaml:"swap_space"`             // vLLM
-	MaxModelLen          int       `yaml:"max_model_len"`          // vLLM
-	TensorParallelSize   int       `yaml:"tensor_parallel_size"`   // vLLM
-	MMProj               string    `yaml:"mmproj"`
+	ContextSize          *int    `yaml:"context_size"`
+	NUMA                 bool    `yaml:"numa"`
+	LoraAdapter          string  `yaml:"lora_adapter"`
+	LoraBase             string  `yaml:"lora_base"`
+	LoraScale            float32 `yaml:"lora_scale"`
+	NoMulMatQ            bool    `yaml:"no_mulmatq"`
+	DraftModel           string  `yaml:"draft_model"`
+	NDraft               int32   `yaml:"n_draft"`
+	Quantization         string  `yaml:"quantization"`
+	LoadFormat           string  `yaml:"load_format"`
+	GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
+	TrustRemoteCode      bool    `yaml:"trust_remote_code"`      // vLLM
+	EnforceEager         bool    `yaml:"enforce_eager"`          // vLLM
+	SwapSpace            int     `yaml:"swap_space"`             // vLLM
+	MaxModelLen          int     `yaml:"max_model_len"`          // vLLM
+	TensorParallelSize   int     `yaml:"tensor_parallel_size"`   // vLLM
+	MMProj               string  `yaml:"mmproj"`

 	FlashAttention bool `yaml:"flash_attention"`
 	NoKVOffloading bool `yaml:"no_kv_offloading"`
@@ -165,8 +162,6 @@ type LLMConfig struct {
 	YarnAttnFactor float32 `yaml:"yarn_attn_factor"`
 	YarnBetaFast   float32 `yaml:"yarn_beta_fast"`
 	YarnBetaSlow   float32 `yaml:"yarn_beta_slow"`
-
-	CFGScale float32 `yaml:"cfg_scale"` // Classifier-Free Guidance Scale
 }

 // AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -140,7 +140,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
 		}
 	}

-	cfg.SetDefaults(append(opts, ModelPath(modelPath))...)
+	cfg.SetDefaults(opts...)

 	return cfg, nil
 }
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -12,8 +12,6 @@ import (
 	"gopkg.in/yaml.v3"
 )

-const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
-
 var _ = Describe("Model test", func() {

 	Context("Downloading", func() {
@@ -49,7 +47,7 @@ var _ = Describe("Model test", func() {

 			gallery := []GalleryModel{{
 				Name: "bert",
-				URL:  bertEmbeddingsURL,
+				URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 			}}
 			out, err := yaml.Marshal(gallery)
 			Expect(err).ToNot(HaveOccurred())
@@ -68,7 +66,7 @@ var _ = Describe("Model test", func() {
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(models)).To(Equal(1))
 			Expect(models[0].Name).To(Equal("bert"))
-			Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
+			Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
 			Expect(models[0].Installed).To(BeFalse())

 			err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
@@ -80,7 +78,7 @@ var _ = Describe("Model test", func() {
 			content := map[string]interface{}{}
 			err = yaml.Unmarshal(dat, &content)
 			Expect(err).ToNot(HaveOccurred())
-			Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
+			Expect(content["backend"]).To(Equal("bert-embeddings"))

 			models, err = AvailableGalleryModels(galleries, tempdir)
 			Expect(err).ToNot(HaveOccurred())
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -5,12 +5,14 @@ import (
 	"context"
 	"embed"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"path/filepath"
 	"runtime"
+	"strings"

 	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/core/http"
@@ -238,8 +240,6 @@ func postInvalidRequest(url string) (error, int) {
 	return nil, resp.StatusCode
 }

-const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
-
 //go:embed backend-assets/*
 var backendAssets embed.FS

@@ -279,13 +279,13 @@ var _ = Describe("API test", func() {
 			g := []gallery.GalleryModel{
 				{
 					Name: "bert",
-					URL:  bertEmbeddingsURL,
+					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 				},
 				{
 					Name:            "bert2",
-					URL:             bertEmbeddingsURL,
+					URL:             "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
 				},
 			}
 			out, err := yaml.Marshal(g)
@@ -345,7 +345,7 @@ var _ = Describe("API test", func() {
 			It("Should fail if the api key is missing", func() {
 				err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
 				Expect(err).ToNot(BeNil())
-				Expect(sc).To(Equal(401))
+				Expect(sc).To(Equal(403))
 			})
 		})

@@ -383,7 +383,7 @@ var _ = Describe("API test", func() {
 				content := map[string]interface{}{}
 				err = yaml.Unmarshal(dat, &content)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
+				Expect(content["backend"]).To(Equal("bert-embeddings"))
 				Expect(content["foo"]).To(Equal("bar"))

 				models, err = getModels("http://127.0.0.1:9090/models/available")
@@ -402,7 +402,7 @@ var _ = Describe("API test", func() {
 			It("overrides models", func() {

 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:  bertEmbeddingsURL,
+					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 					Name: "bert",
 					Overrides: map[string]interface{}{
 						"backend": "llama",
@@ -438,7 +438,7 @@ var _ = Describe("API test", func() {
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					return response["processed"].(bool)
-				}, "900s", "10s").Should(Equal(true))
+				}, "360s", "10s").Should(Equal(true))

 				Eventually(func() []string {
 					models, _ := client.ListModels(context.TODO())
@@ -451,7 +451,7 @@ var _ = Describe("API test", func() {
 			})
 			It("apply models without overrides", func() {
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       bertEmbeddingsURL,
+					URL:       "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 					Name:      "bert",
 					Overrides: map[string]interface{}{},
 				})
@@ -471,7 +471,7 @@ var _ = Describe("API test", func() {
 				content := map[string]interface{}{}
 				err = yaml.Unmarshal(dat, &content)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
+				Expect(content["backend"]).To(Equal("bert-embeddings"))
 			})

 			It("runs openllama(llama-ggml backend)", Label("llama"), func() {
@@ -562,7 +562,7 @@ var _ = Describe("API test", func() {
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					return response["processed"].(bool)
-				}, "900s", "10s").Should(Equal(true))
+				}, "360s", "10s").Should(Equal(true))

 				By("testing chat")
 				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: modelName, Messages: []openai.ChatCompletionMessage{
@@ -806,7 +806,7 @@ var _ = Describe("API test", func() {
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
+			Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
 		})
 		It("can generate completions via ggml", func() {
 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
@@ -866,8 +866,8 @@ var _ = Describe("API test", func() {
 				},
 			)
 			Expect(err).ToNot(HaveOccurred(), err)
-			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
-			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
+			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
+			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))

 			sunEmbedding := resp.Data[0].Embedding
 			resp2, err := client.CreateEmbeddings(
@@ -911,6 +911,71 @@ var _ = Describe("API test", func() {
 			})
 		})

+		Context("backends", func() {
+			It("runs rwkv completion", func() {
+				if runtime.GOOS != "linux" {
+					Skip("test supported only on linux")
+				}
+				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Choices) > 0).To(BeTrue())
+				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
+
+				stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
+					Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
+				})
+				Expect(err).ToNot(HaveOccurred())
+				defer stream.Close()
+
+				tokens := 0
+				text := ""
+				for {
+					response, err := stream.Recv()
+					if errors.Is(err, io.EOF) {
+						break
+					}
+
+					Expect(err).ToNot(HaveOccurred())
+					text += response.Choices[0].Text
+					tokens++
+				}
+				Expect(text).ToNot(BeEmpty())
+				Expect(text).To(ContainSubstring("five"))
+				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
+			})
+			It("runs rwkv chat completion", func() {
+				if runtime.GOOS != "linux" {
+					Skip("test supported only on linux")
+				}
+				resp, err := client.CreateChatCompletion(context.TODO(),
+					openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Choices) > 0).To(BeTrue())
+				Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
+
+				stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
+				Expect(err).ToNot(HaveOccurred())
+				defer stream.Close()
+
+				tokens := 0
+				text := ""
+				for {
+					response, err := stream.Recv()
+					if errors.Is(err, io.EOF) {
+						break
+					}
+
+					Expect(err).ToNot(HaveOccurred())
+					text += response.Choices[0].Delta.Content
+					tokens++
+				}
+				Expect(text).ToNot(BeEmpty())
+				Expect(strings.ToLower(text)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
+
+				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
+			})
+		})
+
 		// See tests/integration/stores_test
 		Context("Stores", Label("stores"), func() {

--- a/core/http/elements/buttons.go
+++ b/core/http/elements/buttons.go
@@ -1,97 +0,0 @@
-package elements
-
-import (
-	"strings"
-
-	"github.com/chasefleming/elem-go"
-	"github.com/chasefleming/elem-go/attrs"
-	"github.com/mudler/LocalAI/core/gallery"
-)
-
-func installButton(galleryName string) elem.Node {
-	return elem.Button(
-		attrs.Props{
-			"data-twe-ripple-init":  "",
-			"data-twe-ripple-color": "light",
-			"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
-			"hx-swap":               "outerHTML",
-			// post the Model ID as param
-			"hx-post": "/browse/install/model/" + galleryName,
-		},
-		elem.I(
-			attrs.Props{
-				"class": "fa-solid fa-download pr-2",
-			},
-		),
-		elem.Text("Install"),
-	)
-}
-
-func reInstallButton(galleryName string) elem.Node {
-	return elem.Button(
-		attrs.Props{
-			"data-twe-ripple-init":  "",
-			"data-twe-ripple-color": "light",
-			"class":                 "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
-			"hx-target":             "#action-div-" + dropBadChars(galleryName),
-			"hx-swap":               "outerHTML",
-			// post the Model ID as param
-			"hx-post": "/browse/install/model/" + galleryName,
-		},
-		elem.I(
-			attrs.Props{
-				"class": "fa-solid fa-arrow-rotate-right pr-2",
-			},
-		),
-		elem.Text("Reinstall"),
-	)
-}
-
-func infoButton(m *gallery.GalleryModel) elem.Node {
-	return elem.Button(
-		attrs.Props{
-			"data-twe-ripple-init":  "",
-			"data-twe-ripple-color": "light",
-			"class":                 "float-left inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
-			"data-modal-target":     modalName(m),
-			"data-modal-toggle":     modalName(m),
-		},
-		elem.P(
-			attrs.Props{
-				"class": "flex items-center",
-			},
-			elem.I(
-				attrs.Props{
-					"class": "fas fa-info-circle pr-2",
-				},
-			),
-			elem.Text("Info"),
-		),
-	)
-}
-
-func deleteButton(galleryID string) elem.Node {
-	return elem.Button(
-		attrs.Props{
-			"data-twe-ripple-init":  "",
-			"data-twe-ripple-color": "light",
-			"hx-confirm":            "Are you sure you wish to delete the model?",
-			"class":                 "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
-			"hx-target":             "#action-div-" + dropBadChars(galleryID),
-			"hx-swap":               "outerHTML",
-			// post the Model ID as param
-			"hx-post": "/browse/delete/model/" + galleryID,
-		},
-		elem.I(
-			attrs.Props{
-				"class": "fa-solid fa-cancel pr-2",
-			},
-		),
-		elem.Text("Delete"),
-	)
-}
-
-// Javascript/HTMX doesn't like weird IDs
-func dropBadChars(s string) string {
-	return strings.ReplaceAll(s, "@", "__")
-}
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -2,11 +2,13 @@ package elements

 import (
 	"fmt"
+	"strings"

 	"github.com/chasefleming/elem-go"
 	"github.com/chasefleming/elem-go/attrs"
 	"github.com/microcosm-cc/bluemonday"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
 )

@@ -14,6 +16,231 @@ const (
 	noImage = "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg"
 )

+func renderElements(n []elem.Node) string {
+	render := ""
+	for _, r := range n {
+		render += r.Render()
+	}
+	return render
+}
+
+func DoneProgress(galleryID, text string, showDelete bool) string {
+	var modelName = galleryID
+	// Split by @ and grab the name
+	if strings.Contains(galleryID, "@") {
+		modelName = strings.Split(galleryID, "@")[1]
+	}
+
+	return elem.Div(
+		attrs.Props{
+			"id": "action-div-" + dropBadChars(galleryID),
+		},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
+		),
+		elem.If(showDelete, deleteButton(galleryID, modelName), reInstallButton(galleryID)),
+	).Render()
+}
+
+func ErrorProgress(err, galleryName string) string {
+	return elem.Div(
+		attrs.Props{},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text("Error "+bluemonday.StrictPolicy().Sanitize(err)),
+		),
+		installButton(galleryName),
+	).Render()
+}
+
+func ProgressBar(progress string) string {
+	return elem.Div(attrs.Props{
+		"class":           "progress",
+		"role":            "progressbar",
+		"aria-valuemin":   "0",
+		"aria-valuemax":   "100",
+		"aria-valuenow":   "0",
+		"aria-labelledby": "pblabel",
+	},
+		elem.Div(attrs.Props{
+			"id":    "pb",
+			"class": "progress-bar",
+			"style": "width:" + progress + "%",
+		}),
+	).Render()
+}
+
+func P2PNodeStats(nodes []p2p.NodeData) string {
+	/*
+	   <div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
+	                       <p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
+	                       {{ $online := 0 }}
+	                       {{ range .Nodes }}
+	                           {{ if .IsOnline }}
+	                               {{ $online = add $online 1 }}
+	                           {{ end }}
+	                       {{ end }}
+	                       <p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
+	                   </div>
+	*/
+
+	online := 0
+	for _, n := range nodes {
+		if n.IsOnline() {
+			online++
+		}
+	}
+
+	class := "text-green-500"
+	if online == 0 {
+		class = "text-red-500"
+	}
+	/*
+	   <i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
+	*/
+	circle := elem.I(attrs.Props{
+		"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
+	})
+	nodesElements := []elem.Node{
+		elem.Span(
+			attrs.Props{
+				"class": class,
+			},
+			circle,
+			elem.Text(fmt.Sprintf("%d", online)),
+		),
+		elem.Span(
+			attrs.Props{
+				"class": "text-gray-200",
+			},
+			elem.Text(fmt.Sprintf("/%d", len(nodes))),
+		),
+	}
+
+	return renderElements(nodesElements)
+}
+
+func P2PNodeBoxes(nodes []p2p.NodeData) string {
+	/*
+			<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
+			<div class="flex items-center mb-2">
+				<i class="fas fa-desktop text-gray-400 mr-2"></i>
+				<span class="text-gray-200 font-semibold">{{.ID}}</span>
+			</div>
+			<p class="text-sm text-gray-400 mt-2 flex items-center">
+				Status:
+				<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
+				<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
+					{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
+				</span>
+			</p>
+		</div>
+	*/
+
+	nodesElements := []elem.Node{}
+
+	for _, n := range nodes {
+
+		nodesElements = append(nodesElements,
+			elem.Div(
+				attrs.Props{
+					"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
+				},
+				elem.P(
+					attrs.Props{
+						"class": "text-sm text-gray-400 mt-2 flex",
+					},
+					elem.I(
+						attrs.Props{
+							"class": "fas fa-desktop text-gray-400 mr-2",
+						},
+					),
+					elem.Text("Name: "),
+					elem.Span(
+						attrs.Props{
+							"class": "text-gray-200 font-semibold ml-2 mr-1",
+						},
+						elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)),
+					),
+					elem.Text("Status: "),
+					elem.If(
+						n.IsOnline(),
+						elem.I(
+							attrs.Props{
+								"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
+							},
+						),
+						elem.I(
+							attrs.Props{
+								"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
+							},
+						),
+					),
+					elem.If(
+						n.IsOnline(),
+						elem.Span(
+							attrs.Props{
+								"class": "text-green-400",
+							},
+
+							elem.Text("Online"),
+						),
+						elem.Span(
+							attrs.Props{
+								"class": "text-red-400",
+							},
+							elem.Text("Offline"),
+						),
+					),
+				),
+			))
+	}
+
+	return renderElements(nodesElements)
+}
+
+func StartProgressBar(uid, progress, text string) string {
+	if progress == "" {
+		progress = "0"
+	}
+	return elem.Div(
+		attrs.Props{
+			"hx-trigger": "done",
+			"hx-get":     "/browse/job/" + uid,
+			"hx-swap":    "outerHTML",
+			"hx-target":  "this",
+		},
+		elem.H3(
+			attrs.Props{
+				"role":      "status",
+				"id":        "pblabel",
+				"tabindex":  "-1",
+				"autofocus": "",
+			},
+			elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
+			elem.Div(attrs.Props{
+				"hx-get":     "/browse/job/progress/" + uid,
+				"hx-trigger": "every 600ms",
+				"hx-target":  "this",
+				"hx-swap":    "innerHTML",
+			},
+				elem.Raw(ProgressBar(progress)),
+			),
+		),
+	).Render()
+}
+
 func cardSpan(text, icon string) elem.Node {
 	return elem.Span(
 		attrs.Props{
@@ -41,6 +268,7 @@ func searchableElement(text, icon string) elem.Node {
 			attrs.Props{
 				"class": "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
 			},
+
 			elem.A(
 				attrs.Props{
 					//	"name":      "search",
@@ -62,8 +290,7 @@ func searchableElement(text, icon string) elem.Node {
 	)
 }

-/*
-func buttonLink(text, url string) elem.Node {
+func link(text, url string) elem.Node {
 	return elem.A(
 		attrs.Props{
 			"class":  "inline-block bg-gray-200 rounded-full px-3 py-1 text-sm font-semibold text-gray-700 mr-2 mb-2 hover:bg-gray-300 hover:shadow-gray-2",
@@ -76,255 +303,163 @@ func buttonLink(text, url string) elem.Node {
 		elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
 	)
 }
-*/
-
-func link(text, url string) elem.Node {
-	return elem.A(
+func installButton(galleryName string) elem.Node {
+	return elem.Button(
 		attrs.Props{
-			"class":  "text-base leading-relaxed text-gray-500 dark:text-gray-400",
-			"href":   url,
-			"target": "_blank",
+			"data-twe-ripple-init":  "",
+			"data-twe-ripple-color": "light",
+			"class":                 "float-right inline-block rounded bg-primary px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"hx-swap":               "outerHTML",
+			// post the Model ID as param
+			"hx-post": "/browse/install/model/" + galleryName,
 		},
-		elem.I(attrs.Props{
-			"class": "fas fa-link pr-2",
-		}),
-		elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
+		elem.I(
+			attrs.Props{
+				"class": "fa-solid fa-download pr-2",
+			},
+		),
+		elem.Text("Install"),
 	)
 }

+func reInstallButton(galleryName string) elem.Node {
+	return elem.Button(
+		attrs.Props{
+			"data-twe-ripple-init":  "",
+			"data-twe-ripple-color": "light",
+			"class":                 "float-right inline-block rounded bg-primary ml-2 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-primary-accent-300 hover:shadow-primary-2 focus:bg-primary-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-primary-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"hx-target":             "#action-div-" + dropBadChars(galleryName),
+			"hx-swap":               "outerHTML",
+			// post the Model ID as param
+			"hx-post": "/browse/install/model/" + galleryName,
+		},
+		elem.I(
+			attrs.Props{
+				"class": "fa-solid fa-arrow-rotate-right pr-2",
+			},
+		),
+		elem.Text("Reinstall"),
+	)
+}
+
+func deleteButton(galleryID, modelName string) elem.Node {
+	return elem.Button(
+		attrs.Props{
+			"data-twe-ripple-init":  "",
+			"data-twe-ripple-color": "light",
+			"hx-confirm":            "Are you sure you wish to delete the model?",
+			"class":                 "float-right inline-block rounded bg-red-800 px-6 pb-2.5 mb-3 pt-2.5 text-xs font-medium uppercase leading-normal text-white shadow-primary-3 transition duration-150 ease-in-out hover:bg-red-accent-300 hover:shadow-red-2 focus:bg-red-accent-300 focus:shadow-primary-2 focus:outline-none focus:ring-0 active:bg-red-600 active:shadow-primary-2 dark:shadow-black/30 dark:hover:shadow-dark-strong dark:focus:shadow-dark-strong dark:active:shadow-dark-strong",
+			"hx-target":             "#action-div-" + dropBadChars(galleryID),
+			"hx-swap":               "outerHTML",
+			// post the Model ID as param
+			"hx-post": "/browse/delete/model/" + galleryID,
+		},
+		elem.I(
+			attrs.Props{
+				"class": "fa-solid fa-cancel pr-2",
+			},
+		),
+		elem.Text("Delete"),
+	)
+}
+
+// Javascript/HTMX doesn't like weird IDs
+func dropBadChars(s string) string {
+	return strings.ReplaceAll(s, "@", "__")
+}
+
 type ProcessTracker interface {
 	Exists(string) bool
 	Get(string) string
 }

-func modalName(m *gallery.GalleryModel) string {
-	return m.Name + "-modal"
-}
-
-func modelDescription(m *gallery.GalleryModel) elem.Node {
-	urls := []elem.Node{}
-	for _, url := range m.URLs {
-		urls = append(urls,
-			elem.Li(attrs.Props{}, link(url, url)),
-		)
-	}
-
-	tagsNodes := []elem.Node{}
-	for _, tag := range m.Tags {
-		tagsNodes = append(tagsNodes,
-			searchableElement(tag, "fas fa-tag"),
-		)
-	}
-
-	return elem.Div(
-		attrs.Props{
-			"class": "p-6 text-surface dark:text-white",
-		},
-		elem.H5(
+func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
+	modelsElements := []elem.Node{}
+	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
+		return elem.Div(
 			attrs.Props{
-				"class": "mb-2 text-xl font-bold leading-tight",
+				"class": "p-6 text-surface dark:text-white",
 			},
-			elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
-		),
-		elem.Div( // small description
-			attrs.Props{
-				"class": "mb-4 text-sm truncate text-base",
-			},
-			elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
-		),
-
-		elem.Div(
-			attrs.Props{
-				"id":          modalName(m),
-				"tabindex":    "-1",
-				"aria-hidden": "true",
-				"class":       "hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full",
-			},
-			elem.Div(
+			elem.H5(
 				attrs.Props{
-					"class": "relative p-4 w-full max-w-2xl max-h-full",
+					"class": "mb-2 text-xl font-bold leading-tight",
 				},
-				elem.Div(
-					attrs.Props{
-						"class": "relative p-4 w-full max-w-2xl max-h-full bg-white rounded-lg shadow dark:bg-gray-700",
-					},
-					// header
-					elem.Div(
-						attrs.Props{
-							"class": "flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600",
-						},
-						elem.H3(
-							attrs.Props{
-								"class": "text-xl font-semibold text-gray-900 dark:text-white",
-							},
-							elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
-						),
-						elem.Button( // close button
-							attrs.Props{
-								"class":           "text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white",
-								"data-modal-hide": modalName(m),
-							},
-							elem.Raw(
-								`<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
-									<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
-								</svg>`,
-							),
-							elem.Span(
-								attrs.Props{
-									"class": "sr-only",
-								},
-								elem.Text("Close modal"),
-							),
-						),
-					),
-					// body
-					elem.Div(
-						attrs.Props{
-							"class": "p-4 md:p-5 space-y-4",
-						},
-						elem.Div(
-							attrs.Props{
-								"class": "flex justify-center items-center",
-							},
-							elem.Img(attrs.Props{
-								//	"class": "rounded-t-lg object-fit object-center h-96",
-								"class":   "lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded",
-								"src":     m.Icon,
-								"loading": "lazy",
-							}),
-						),
-						elem.P(
-							attrs.Props{
-								"class": "text-base leading-relaxed text-gray-500 dark:text-gray-400",
-							},
-							elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
-						),
-						elem.Hr(
-							attrs.Props{},
-						),
-						elem.P(
-							attrs.Props{
-								"class": "text-sm font-semibold text-gray-900 dark:text-white",
-							},
-							elem.Text("Links"),
-						),
-						elem.Ul(
-							attrs.Props{},
-							urls...,
-						),
-						elem.If(
-							len(m.Tags) > 0,
-							elem.Div(
-								attrs.Props{},
-								elem.P(
-									attrs.Props{
-										"class": "text-sm mb-5 font-semibold text-gray-900 dark:text-white",
-									},
-									elem.Text("Tags"),
-								),
-								elem.Div(
-									attrs.Props{
-										"class": "flex flex-row flex-wrap content-center",
-									},
-									tagsNodes...,
-								),
-							),
-							elem.Div(attrs.Props{}),
-						),
-					),
-					// Footer
-					elem.Div(
-						attrs.Props{
-							"class": "flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600",
-						},
-						elem.Button(
-							attrs.Props{
-								"data-modal-hide": modalName(m),
-								"class":           "py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700",
-							},
-							elem.Text("Close"),
-						),
-					),
-				),
+				elem.Text(bluemonday.StrictPolicy().Sanitize(m.Name)),
+			),
+			elem.P(
+				attrs.Props{
+					"class": "mb-4 text-sm [&:not(:hover)]:truncate text-base",
+				},
+				elem.Text(bluemonday.StrictPolicy().Sanitize(m.Description)),
 			),
-		),
-	)
-}
-
-func modelActionItems(m *gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) elem.Node {
-	galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
-	currentlyProcessing := processTracker.Exists(galleryID)
-	jobID := ""
-	isDeletionOp := false
-	if currentlyProcessing {
-		status := galleryService.GetStatus(galleryID)
-		if status != nil && status.Deletion {
-			isDeletionOp = true
-		}
-		jobID = processTracker.Get(galleryID)
-		// TODO:
-		// case not handled, if status == nil : "Waiting"
-	}
-
-	nodes := []elem.Node{
-		cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
-	}
-
-	if m.License != "" {
-		nodes = append(nodes,
-			cardSpan("License: "+m.License, "fas fa-book"),
 		)
 	}
-	/*
-		tagsNodes := []elem.Node{}

-			for _, tag := range m.Tags {
-				tagsNodes = append(tagsNodes,
-					searchableElement(tag, "fas fa-tag"),
-				)
+	actionDiv := func(m *gallery.GalleryModel) elem.Node {
+		galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
+		currentlyProcessing := processTracker.Exists(galleryID)
+		jobID := ""
+		isDeletionOp := false
+		if currentlyProcessing {
+			status := galleryService.GetStatus(galleryID)
+			if status != nil && status.Deletion {
+				isDeletionOp = true
 			}
+			jobID = processTracker.Get(galleryID)
+			// TODO:
+			// case not handled, if status == nil : "Waiting"
+		}

+		nodes := []elem.Node{
+			cardSpan("Repository: "+m.Gallery.Name, "fa-brands fa-git-alt"),
+		}

-				nodes = append(nodes,
-					elem.Div(
-						attrs.Props{
-							"class": "flex flex-row flex-wrap content-center",
-						},
-						tagsNodes...,
-					),
-				)
+		if m.License != "" {
+			nodes = append(nodes,
+				cardSpan("License: "+m.License, "fas fa-book"),
+			)
+		}

-				for i, url := range m.URLs {
-					nodes = append(nodes,
-						buttonLink("Link #"+fmt.Sprintf("%d", i+1), url),
-					)
-				}
-	*/
+		tagsNodes := []elem.Node{}
+		for _, tag := range m.Tags {
+			tagsNodes = append(tagsNodes,
+				searchableElement(tag, "fas fa-tag"),
+			)
+		}

-	progressMessage := "Installation"
-	if isDeletionOp {
-		progressMessage = "Deletion"
-	}
-
-	return elem.Div(
-		attrs.Props{
-			"class": "px-6 pt-4 pb-2",
-		},
-		elem.P(
-			attrs.Props{
-				"class": "mb-4 text-base",
-			},
-			nodes...,
-		),
-		elem.Div(
-			attrs.Props{
-				"id":    "action-div-" + dropBadChars(galleryID),
-				"class": "flow-root", // To order buttons left and right
-			},
-			infoButton(m),
+		nodes = append(nodes,
 			elem.Div(
 				attrs.Props{
-					"class": "float-right",
+					"class": "flex flex-row flex-wrap content-center",
+				},
+				tagsNodes...,
+			),
+		)
+
+		for i, url := range m.URLs {
+			nodes = append(nodes,
+				link("Link #"+fmt.Sprintf("%d", i+1), url),
+			)
+		}
+
+		progressMessage := "Installation"
+		if isDeletionOp {
+			progressMessage = "Deletion"
+		}
+
+		return elem.Div(
+			attrs.Props{
+				"class": "px-6 pt-4 pb-2",
+			},
+			elem.P(
+				attrs.Props{
+					"class": "mb-4 text-base",
+				},
+				nodes...,
+			),
+			elem.Div(
+				attrs.Props{
+					"id": "action-div-" + dropBadChars(galleryID),
 				},
 				elem.If(
 					currentlyProcessing,
@@ -335,18 +470,14 @@ func modelActionItems(m *gallery.GalleryModel, processTracker ProcessTracker, ga
 						elem.Node(elem.Div(
 							attrs.Props{},
 							reInstallButton(m.ID()),
-							deleteButton(m.ID()),
+							deleteButton(m.ID(), m.Name),
 						)),
 						installButton(m.ID()),
 					),
 				),
 			),
-		),
-	)
-}
-
-func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
-	modelsElements := []elem.Node{}
+		)
+	}

 	for _, m := range models {
 		elems := []elem.Node{}
@@ -390,10 +521,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 			))
 		}

-		elems = append(elems,
-			modelDescription(m),
-			modelActionItems(m, processTracker, galleryService),
-		)
+		elems = append(elems, descriptionDiv(m), actionDiv(m))
 		modelsElements = append(modelsElements,
 			elem.Div(
 				attrs.Props{
--- a/core/http/elements/p2p.go
+++ b/core/http/elements/p2p.go
@@ -1,147 +0,0 @@
-package elements
-
-import (
-	"fmt"
-
-	"github.com/chasefleming/elem-go"
-	"github.com/chasefleming/elem-go/attrs"
-	"github.com/microcosm-cc/bluemonday"
-	"github.com/mudler/LocalAI/core/p2p"
-)
-
-func renderElements(n []elem.Node) string {
-	render := ""
-	for _, r := range n {
-		render += r.Render()
-	}
-	return render
-}
-
-func P2PNodeStats(nodes []p2p.NodeData) string {
-	/*
-	   <div class="bg-gray-800 p-6 rounded-lg shadow-lg text-left">
-	                       <p class="text-xl font-semibold text-gray-200">Total Workers Detected: {{ len .Nodes }}</p>
-	                       {{ $online := 0 }}
-	                       {{ range .Nodes }}
-	                           {{ if .IsOnline }}
-	                               {{ $online = add $online 1 }}
-	                           {{ end }}
-	                       {{ end }}
-	                       <p class="text-xl font-semibold text-gray-200">Total Online Workers: {{$online}}</p>
-	                   </div>
-	*/
-
-	online := 0
-	for _, n := range nodes {
-		if n.IsOnline() {
-			online++
-		}
-	}
-
-	class := "text-green-500"
-	if online == 0 {
-		class = "text-red-500"
-	}
-	/*
-	   <i class="fas fa-circle animate-pulse text-green-500 ml-2 mr-1"></i>
-	*/
-	circle := elem.I(attrs.Props{
-		"class": "fas fa-circle animate-pulse " + class + " ml-2 mr-1",
-	})
-	nodesElements := []elem.Node{
-		elem.Span(
-			attrs.Props{
-				"class": class,
-			},
-			circle,
-			elem.Text(fmt.Sprintf("%d", online)),
-		),
-		elem.Span(
-			attrs.Props{
-				"class": "text-gray-200",
-			},
-			elem.Text(fmt.Sprintf("/%d", len(nodes))),
-		),
-	}
-
-	return renderElements(nodesElements)
-}
-
-func P2PNodeBoxes(nodes []p2p.NodeData) string {
-	/*
-			<div class="bg-gray-800 p-4 rounded-lg shadow-lg text-left">
-			<div class="flex items-center mb-2">
-				<i class="fas fa-desktop text-gray-400 mr-2"></i>
-				<span class="text-gray-200 font-semibold">{{.ID}}</span>
-			</div>
-			<p class="text-sm text-gray-400 mt-2 flex items-center">
-				Status:
-				<i class="fas fa-circle {{ if .IsOnline }}text-green-500{{ else }}text-red-500{{ end }} ml-2 mr-1"></i>
-				<span class="{{ if .IsOnline }}text-green-400{{ else }}text-red-400{{ end }}">
-					{{ if .IsOnline }}Online{{ else }}Offline{{ end }}
-				</span>
-			</p>
-		</div>
-	*/
-
-	nodesElements := []elem.Node{}
-
-	for _, n := range nodes {
-
-		nodesElements = append(nodesElements,
-			elem.Div(
-				attrs.Props{
-					"class": "bg-gray-700 p-6 rounded-lg shadow-lg text-left",
-				},
-				elem.P(
-					attrs.Props{
-						"class": "text-sm text-gray-400 mt-2 flex",
-					},
-					elem.I(
-						attrs.Props{
-							"class": "fas fa-desktop text-gray-400 mr-2",
-						},
-					),
-					elem.Text("Name: "),
-					elem.Span(
-						attrs.Props{
-							"class": "text-gray-200 font-semibold ml-2 mr-1",
-						},
-						elem.Text(bluemonday.StrictPolicy().Sanitize(n.ID)),
-					),
-					elem.Text("Status: "),
-					elem.If(
-						n.IsOnline(),
-						elem.I(
-							attrs.Props{
-								"class": "fas fa-circle animate-pulse text-green-500 ml-2 mr-1",
-							},
-						),
-						elem.I(
-							attrs.Props{
-								"class": "fas fa-circle animate-pulse text-red-500 ml-2 mr-1",
-							},
-						),
-					),
-					elem.If(
-						n.IsOnline(),
-						elem.Span(
-							attrs.Props{
-								"class": "text-green-400",
-							},
-
-							elem.Text("Online"),
-						),
-						elem.Span(
-							attrs.Props{
-								"class": "text-red-400",
-							},
-							elem.Text("Offline"),
-						),
-					),
-				),
-			))
-	}
-
-	return renderElements(nodesElements)
-}
--- a/core/http/elements/progressbar.go
+++ b/core/http/elements/progressbar.go
@@ -1,89 +0,0 @@
-package elements
-
-import (
-	"github.com/chasefleming/elem-go"
-	"github.com/chasefleming/elem-go/attrs"
-	"github.com/microcosm-cc/bluemonday"
-)
-
-func DoneProgress(galleryID, text string, showDelete bool) string {
-	return elem.Div(
-		attrs.Props{
-			"id": "action-div-" + dropBadChars(galleryID),
-		},
-		elem.H3(
-			attrs.Props{
-				"role":      "status",
-				"id":        "pblabel",
-				"tabindex":  "-1",
-				"autofocus": "",
-			},
-			elem.Text(bluemonday.StrictPolicy().Sanitize(text)),
-		),
-		elem.If(showDelete, deleteButton(galleryID), reInstallButton(galleryID)),
-	).Render()
-}
-
-func ErrorProgress(err, galleryName string) string {
-	return elem.Div(
-		attrs.Props{},
-		elem.H3(
-			attrs.Props{
-				"role":      "status",
-				"id":        "pblabel",
-				"tabindex":  "-1",
-				"autofocus": "",
-			},
-			elem.Text("Error "+bluemonday.StrictPolicy().Sanitize(err)),
-		),
-		installButton(galleryName),
-	).Render()
-}
-
-func ProgressBar(progress string) string {
-	return elem.Div(attrs.Props{
-		"class":           "progress",
-		"role":            "progressbar",
-		"aria-valuemin":   "0",
-		"aria-valuemax":   "100",
-		"aria-valuenow":   "0",
-		"aria-labelledby": "pblabel",
-	},
-		elem.Div(attrs.Props{
-			"id":    "pb",
-			"class": "progress-bar",
-			"style": "width:" + progress + "%",
-		}),
-	).Render()
-}
-
-func StartProgressBar(uid, progress, text string) string {
-	if progress == "" {
-		progress = "0"
-	}
-	return elem.Div(
-		attrs.Props{
-			"hx-trigger": "done",
-			"hx-get":     "/browse/job/" + uid,
-			"hx-swap":    "outerHTML",
-			"hx-target":  "this",
-		},
-		elem.H3(
-			attrs.Props{
-				"role":      "status",
-				"id":        "pblabel",
-				"tabindex":  "-1",
-				"autofocus": "",
-			},
-			elem.Text(bluemonday.StrictPolicy().Sanitize(text)), //Perhaps overly defensive
-			elem.Div(attrs.Props{
-				"hx-get":     "/browse/job/progress/" + uid,
-				"hx-trigger": "every 600ms",
-				"hx-target":  "this",
-				"hx-swap":    "innerHTML",
-			},
-				elem.Raw(ProgressBar(progress)),
-			),
-		),
-	).Render()
-}
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -21,15 +21,10 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
 		for b := range appConfig.ExternalGRPCBackends {
 			availableBackends = append(availableBackends, b)
 		}
-
-		sysmodels := []schema.SysInfoModel{}
-		for _, m := range loadedModels {
-			sysmodels = append(sysmodels, schema.SysInfoModel{ID: m.ID})
-		}
 		return c.JSON(
 			schema.SystemInformationResponse{
 				Backends: availableBackends,
-				Models:   sysmodels,
+				Models:   loadedModels,
 			},
 		)
 	}
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -9,19 +9,16 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/rs/zerolog/log"
-
-	"github.com/mudler/LocalAI/pkg/utils"
 )

 // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
-//
-//		@Summary	Generates audio from the input text.
-//	 	@Accept json
-//	 	@Produce audio/x-wav
-//		@Param		request	body		schema.TTSRequest	true	"query params"
-//		@Success	200		{string}	binary				"generated audio/wav file"
-//		@Router		/v1/audio/speech [post]
-//		@Router		/tts [post]
+//	@Summary	Generates audio from the input text.
+//  @Accept json
+//  @Produce audio/x-wav
+//	@Param		request	body		schema.TTSRequest	true	"query params"
+//	@Success	200		{string}	binary				"generated audio/wav file"
+//	@Router		/v1/audio/speech [post]
+//	@Router		/tts [post]
 func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {

@@ -70,13 +67,6 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 		if err != nil {
 			return err
 		}
-
-		// Convert generated file to target format
-		filePath, err = utils.AudioConvert(filePath, input.Format)
-		if err != nil {
-			return err
-		}
-
 		return c.Download(filePath)
 	}
 }
--- a/core/http/endpoints/localai/vad.go
+++ b/core/http/endpoints/localai/vad.go
@@ -1,68 +0,0 @@
-package localai
-
-import (
-	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/backend"
-	"github.com/mudler/LocalAI/core/config"
-	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/rs/zerolog/log"
-)
-
-// VADEndpoint is Voice-Activation-Detection endpoint
-// @Summary	Detect voice fragments in an audio stream
-// @Accept json
-// @Param		request	body		schema.VADRequest	true	"query params"
-// @Success 200 {object} proto.VADResponse "Response"
-// @Router		/vad [post]
-func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-
-		input := new(schema.VADRequest)
-
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
-		if err != nil {
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
-		}
-
-		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
-			config.LoadOptionDebug(appConfig.Debug),
-			config.LoadOptionThreads(appConfig.Threads),
-			config.LoadOptionContextSize(appConfig.ContextSize),
-			config.LoadOptionF16(appConfig.F16),
-		)
-
-		if err != nil {
-			log.Err(err)
-			modelFile = input.Model
-			log.Warn().Msgf("Model not found in context: %s", input.Model)
-		} else {
-			modelFile = cfg.Model
-		}
-		log.Debug().Msgf("Request for model: %s", modelFile)
-
-		opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
-
-		vadModel, err := ml.Load(opts...)
-		if err != nil {
-			return err
-		}
-		req := proto.VADRequest{
-			Audio: input.Audio,
-		}
-		resp, err := vadModel.VAD(c.Context(), &req)
-		if err != nil {
-			return err
-		}
-
-		return c.JSON(resp)
-	}
-}
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -136,11 +136,6 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 			config.Backend = model.StableDiffusionBackend
 		}

-		if !strings.Contains(input.Size, "x") {
-			input.Size = "512x512"
-			log.Warn().Msgf("Invalid size, using default 512x512")
-		}
-
 		sizeParts := strings.Split(input.Size, "x")
 		if len(sizeParts) != 2 {
 			return fmt.Errorf("invalid value for 'size'")
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -304,6 +304,7 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c
 		config.LoadOptionThreads(threads),
 		config.LoadOptionContextSize(ctx),
 		config.LoadOptionF16(f16),
+		config.ModelPath(loader.ModelPath),
 	)

 	// Set the parameters for the language model prediction
--- a/core/http/middleware/auth.go
+++ b/core/http/middleware/auth.go
@@ -1,95 +1,95 @@
-package middleware
-
-import (
-	"crypto/subtle"
-	"errors"
-
-	"github.com/dave-gray101/v2keyauth"
-	"github.com/gofiber/fiber/v2"
-	"github.com/gofiber/fiber/v2/middleware/keyauth"
-	"github.com/mudler/LocalAI/core/config"
-)
-
-// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
-// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
-// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
-
-func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
-	customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key", "cookie:token"}, keyauth.ConfigDefault.AuthScheme)
-	if err != nil {
-		return nil, err
-	}
-
-	return &v2keyauth.Config{
-		CustomKeyLookup: customLookup,
-		Next:            getApiKeyRequiredFilterFunction(applicationConfig),
-		Validator:       getApiKeyValidationFunction(applicationConfig),
-		ErrorHandler:    getApiKeyErrorHandler(applicationConfig),
-		AuthScheme:      "Bearer",
-	}, nil
-}
-
-func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
-	return func(ctx *fiber.Ctx, err error) error {
-		if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
-			if len(applicationConfig.ApiKeys) == 0 {
-				return ctx.Next() // if no keys are set up, any error we get here is not an error.
-			}
-			ctx.Set("WWW-Authenticate", "Bearer")
-			if applicationConfig.OpaqueErrors {
-				return ctx.SendStatus(401)
-			}
-			return ctx.Status(401).Render("views/login", nil)
-		}
-		if applicationConfig.OpaqueErrors {
-			return ctx.SendStatus(500)
-		}
-		return err
-	}
-}
-
-func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
-
-	if applicationConfig.UseSubtleKeyComparison {
-		return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
-			if len(applicationConfig.ApiKeys) == 0 {
-				return true, nil // If no keys are setup, accept everything
-			}
-			for _, validKey := range applicationConfig.ApiKeys {
-				if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
-					return true, nil
-				}
-			}
-			return false, v2keyauth.ErrMissingOrMalformedAPIKey
-		}
-	}
-
-	return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
-		if len(applicationConfig.ApiKeys) == 0 {
-			return true, nil // If no keys are setup, accept everything
-		}
-		for _, validKey := range applicationConfig.ApiKeys {
-			if apiKey == validKey {
-				return true, nil
-			}
-		}
-		return false, v2keyauth.ErrMissingOrMalformedAPIKey
-	}
-}
-
-func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
-	if applicationConfig.DisableApiKeyRequirementForHttpGet {
-		return func(c *fiber.Ctx) bool {
-			if c.Method() != "GET" {
-				return false
-			}
-			for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
-				if rx.MatchString(c.Path()) {
-					return true
-				}
-			}
-			return false
-		}
-	}
-	return func(c *fiber.Ctx) bool { return false }
-}
+package middleware
+
+import (
+	"crypto/subtle"
+	"errors"
+
+	"github.com/dave-gray101/v2keyauth"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/keyauth"
+	"github.com/microcosm-cc/bluemonday"
+	"github.com/mudler/LocalAI/core/config"
+)
+
+// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
+// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
+// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
+
+func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
+	customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
+	if err != nil {
+		return nil, err
+	}
+
+	return &v2keyauth.Config{
+		CustomKeyLookup: customLookup,
+		Next:            getApiKeyRequiredFilterFunction(applicationConfig),
+		Validator:       getApiKeyValidationFunction(applicationConfig),
+		ErrorHandler:    getApiKeyErrorHandler(applicationConfig),
+		AuthScheme:      "Bearer",
+	}, nil
+}
+
+func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
+	return func(ctx *fiber.Ctx, err error) error {
+		if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return ctx.Next() // if no keys are set up, any error we get here is not an error.
+			}
+			if applicationConfig.OpaqueErrors {
+				return ctx.SendStatus(403)
+			}
+			return ctx.Status(403).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
+		}
+		if applicationConfig.OpaqueErrors {
+			return ctx.SendStatus(500)
+		}
+		return err
+	}
+}
+
+func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
+
+	if applicationConfig.UseSubtleKeyComparison {
+		return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return true, nil // If no keys are setup, accept everything
+			}
+			for _, validKey := range applicationConfig.ApiKeys {
+				if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
+					return true, nil
+				}
+			}
+			return false, v2keyauth.ErrMissingOrMalformedAPIKey
+		}
+	}
+
+	return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+		if len(applicationConfig.ApiKeys) == 0 {
+			return true, nil // If no keys are setup, accept everything
+		}
+		for _, validKey := range applicationConfig.ApiKeys {
+			if apiKey == validKey {
+				return true, nil
+			}
+		}
+		return false, v2keyauth.ErrMissingOrMalformedAPIKey
+	}
+}
+
+func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
+	if applicationConfig.DisableApiKeyRequirementForHttpGet {
+		return func(c *fiber.Ctx) bool {
+			if c.Method() != "GET" {
+				return false
+			}
+			for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
+				if rx.MatchString(c.Path()) {
+					return true
+				}
+			}
+			return false
+		}
+	}
+	return func(c *fiber.Ctx) bool { return false }
+}
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -34,7 +34,6 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	}

 	app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
-	app.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))

 	// Stores
 	sl := model.NewModelLoader("")
--- a/core/http/static/assets/flowbite.min.js
+++ b/core/http/static/assets/flowbite.min.js
--- a/core/http/views/login.html
+++ b/core/http/views/login.html
@@ -1,23 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Open Authenticated Website</title>
-</head>
-<body>
-    <h1>Authorization is required</h1>
-    <input type="text" id="token" placeholder="Token" />
-    <button onclick="login()">Login</button>
-    <script>
-        function login() {
-            const token = document.getElementById('token').value;
-            var date = new Date();
-            date.setTime(date.getTime() + (24*60*60*1000));
-            document.cookie = `token=${token}; expires=${date.toGMTString()}`;
-
-            window.location.reload();
-        }
-    </script>
-</body>
-</html>
--- a/core/http/views/partials/footer.html
+++ b/core/http/views/partials/footer.html
@@ -1,5 +1,5 @@
 <footer class="text-center py-8">
    LocalAI Version {{.Version}}<br>
-    <a href='https://github.com/mudler/LocalAI' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
+    <a href='https://localai.io' class="text-blue-400 hover:text-blue-600" target="_blank">LocalAI</a> © 2023-2024 <a href='https://mudler.pm' class="text-blue-400 hover:text-blue-600" target="_blank">Ettore Di Giacinto</a>
 </footer>
 <script src="/static/assets/tw-elements.js"></script>
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -57,7 +57,6 @@
  <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
  <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
  <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
-  <script src="/static/assets/flowbite.min.js"></script>
  <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
  <!-- P2P Animation START -->
  <style>
@@ -119,11 +118,4 @@
            100% { transform: rotate(0deg); } /* Return to center */
        }
  </style>
-
-  <!-- https://stackoverflow.com/questions/76051980/flowbite-component-not-working-when-loaded-via-htmx-django-project -->
-  <script>
-      htmx.onLoad(function(content) {
-          initFlowbite();
-      })
-  </script>
 </head>
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -10,7 +10,6 @@ import (
 	"io"
 	"net"
 	"os"
-	"strings"
 	"sync"
 	"time"

@@ -23,7 +22,6 @@ import (
 	"github.com/mudler/edgevpn/pkg/services"
 	"github.com/mudler/edgevpn/pkg/types"
 	eutils "github.com/mudler/edgevpn/pkg/utils"
-	"github.com/multiformats/go-multiaddr"
 	"github.com/phayes/freeport"
 	zlog "github.com/rs/zerolog/log"

@@ -233,14 +231,10 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin

 				data := ledger.LastBlock().Storage[servicesID]

-				if logLevel == logLevelDebug {
-					// We want to surface this debugging data only if p2p logging is set to debug
-					// (and not generally the whole application, as this can be really noisy)
-					zlog.Debug().Any("data", ledger.LastBlock().Storage).Msg("Ledger data")
-				}
+				zlog.Debug().Any("data", ledger.LastBlock().Storage).Msg("Ledger data")

 				for k, v := range data {
-					// New worker found in the ledger data as k (worker id)
+					zlog.Debug().Msgf("New worker found in the ledger data '%s'", k)
 					nd := &NodeData{}
 					if err := v.Unmarshal(nd); err != nil {
 						zlog.Error().Msg("cannot unmarshal node data")
@@ -275,7 +269,7 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
 	if ndService, found := service[nd.Name]; !found {
 		if !nd.IsOnline() {
 			// if node is offline and not present, do nothing
-			// Node nd.ID is offline
+			zlog.Debug().Msgf("Node %s is offline", nd.ID)
 			return
 		}

@@ -387,35 +381,22 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
 	noLimits := os.Getenv("LOCALAI_P2P_ENABLE_LIMITS") == "true"

-	var listenMaddrs []string
-	var bootstrapPeers []string
-
-	laddrs := os.Getenv("LOCALAI_P2P_LISTEN_MADDRS")
-	if laddrs != "" {
-		listenMaddrs = strings.Split(laddrs, ",")
+	loglevel := os.Getenv("LOCALAI_P2P_LOGLEVEL")
+	if loglevel == "" {
+		loglevel = "info"
 	}
-
-	bootmaddr := os.Getenv("LOCALAI_P2P_BOOTSTRAP_PEERS_MADDRS")
-	if bootmaddr != "" {
-		bootstrapPeers = strings.Split(bootmaddr, ",")
-	}
-
-	dhtAnnounceMaddrs := stringsToMultiAddr(strings.Split(os.Getenv("LOCALAI_P2P_DHT_ANNOUNCE_MADDRS"), ","))
-
-	libp2ploglevel := os.Getenv("LOCALAI_P2P_LIB_LOGLEVEL")
+	libp2ploglevel := os.Getenv("LOCALAI_LIBP2P_LOGLEVEL")
 	if libp2ploglevel == "" {
 		libp2ploglevel = "fatal"
 	}
 	c := config.Config{
-		ListenMaddrs:      listenMaddrs,
-		DHTAnnounceMaddrs: dhtAnnounceMaddrs,
 		Limit: config.ResourceLimit{
 			Enable:   noLimits,
 			MaxConns: 100,
 		},
 		NetworkToken:   token,
 		LowProfile:     false,
-		LogLevel:       logLevel,
+		LogLevel:       loglevel,
 		Libp2pLogLevel: libp2ploglevel,
 		Ledger: config.Ledger{
 			SyncInterval:     defaultInterval,
@@ -430,10 +411,9 @@ func newNodeOpts(token string) ([]node.Option, error) {
 			RateLimitInterval: defaultInterval,
 		},
 		Discovery: config.Discovery{
-			DHT:            !noDHT,
-			MDNS:           true,
-			Interval:       10 * time.Second,
-			BootstrapPeers: bootstrapPeers,
+			DHT:      !noDHT,
+			MDNS:     true,
+			Interval: 10 * time.Second,
 		},
 		Connection: config.Connection{
 			HolePunch:      true,
@@ -452,18 +432,6 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	return nodeOpts, nil
 }

-func stringsToMultiAddr(peers []string) []multiaddr.Multiaddr {
-	res := []multiaddr.Multiaddr{}
-	for _, p := range peers {
-		addr, err := multiaddr.NewMultiaddr(p)
-		if err != nil {
-			continue
-		}
-		res = append(res, addr)
-	}
-	return res
-}
-
 func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
 	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
 	io.Copy(dst, src)
--- a/core/p2p/p2p_common.go
+++ b/core/p2p/p2p_common.go
@@ -1,19 +0,0 @@
-package p2p
-
-import (
-	"os"
-	"strings"
-)
-
-var logLevel = strings.ToLower(os.Getenv("LOCALAI_P2P_LOGLEVEL"))
-
-const (
-	logLevelDebug = "debug"
-	logLevelInfo  = "info"
-)
-
-func init() {
-	if logLevel == "" {
-		logLevel = logLevelInfo
-	}
-}
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -2,6 +2,7 @@ package schema

 import (
 	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/LocalAI/pkg/model"
 	gopsutil "github.com/shirou/gopsutil/v3/process"
 )

@@ -30,14 +31,7 @@ type TTSRequest struct {
 	Input    string `json:"input" yaml:"input"` // text input
 	Voice    string `json:"voice" yaml:"voice"` // voice audio file or speaker id
 	Backend  string `json:"backend" yaml:"backend"`
-	Language string `json:"language,omitempty" yaml:"language,omitempty"`               // (optional) language to use with TTS model
-	Format   string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
-}
-
-// @Description VAD request body
-type VADRequest struct {
-	Model string    `json:"model" yaml:"model"` // model name or full path
-	Audio []float32 `json:"audio" yaml:"audio"` // model name or full path
+	Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
 }

 type StoresSet struct {
@@ -82,11 +76,7 @@ type P2PNodesResponse struct {
 	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
 }

-type SysInfoModel struct {
-	ID string `json:"id"`
-}
-
 type SystemInformationResponse struct {
-	Backends []string       `json:"backends"`
-	Models   []SysInfoModel `json:"loaded_models"`
+	Backends []string      `json:"backends"`
+	Models   []model.Model `json:"loaded_models"`
 }
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -160,10 +160,15 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode

 			log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)

-			o := backend.ModelOptions(*cfg, options)
+			o := backend.ModelOptions(*cfg, options, []model.Option{})

 			var backendErr error
-			_, backendErr = ml.Load(o...)
+			if cfg.Backend != "" {
+				o = append(o, model.WithBackendString(cfg.Backend))
+				_, backendErr = ml.BackendLoader(o...)
+			} else {
+				_, backendErr = ml.GreedyLoader(o...)
+			}
 			if backendErr != nil {
 				return nil, nil, nil, err
 			}
--- a/docs/content/docs/advanced/run-other-models.md
+++ b/docs/content/docs/advanced/run-other-models.md
@@ -18,7 +18,7 @@ There are different categories of models: [LLMs]({{%relref "docs/features/text-g

 {{% alert icon="💡" %}}

-To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
+To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
 {{% /alert %}}

 {{< tabs tabTotal="3" >}}
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -131,13 +131,9 @@ There are options that can be tweaked or parameters that can be set using enviro
 |----------------------|-------------|
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_ENABLE_LIMITS** | Set to "true" to enable connection limits and resources management (useful when running with poor connectivity or want to limit resources consumption) |
-| **LOCALAI_P2P_LISTEN_MADDRS** | Set to comma separated list of multiaddresses to override default libp2p 0.0.0.0 multiaddresses |
-| **LOCALAI_P2P_DHT_ANNOUNCE_MADDRS** | Set to comma separated list of multiaddresses to override announcing of listen multiaddresses (useful when external address:port is remapped) |
-| **LOCALAI_P2P_BOOTSTRAP_PEERS_MADDRS** | Set to comma separated list of multiaddresses to specify custom DHT bootstrap nodes |
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
 | **LOCALAI_P2P_LOGLEVEL** | Set the loglevel for the LocalAI p2p stack (default: info) |
-| **LOCALAI_P2P_LIB_LOGLEVEL** | Set the loglevel for the underlying libp2p stack (default: fatal) |
-
+| **LOCALAI_LIBP2P_LOGLEVEL** | Set the loglevel for the underlying libp2p stack (default: fatal) |

 ## Architecture

--- a/docs/content/docs/features/embeddings.md
+++ b/docs/content/docs/features/embeddings.md
@@ -27,6 +27,39 @@ embeddings: true
 # .. other parameters
 ```

+## Bert embeddings
+
+To use `bert.cpp` models you can use the `bert` embedding backend.
+
+An example model config file:
+
+```yaml
+name: text-embedding-ada-002
+parameters:
+  model: bert
+backend: bert-embeddings
+embeddings: true
+# .. other parameters
+```
+
+The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.
+
+For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:
+
+```bash
+wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
+```
+
+To test locally (LocalAI server running on `localhost`),
+you can use `curl` (and `jq` at the end to prettify):
+
+```bash
+curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
+  "input": "Your text string goes here",
+  "model": "text-embedding-ada-002"
+}' | jq "."
+```
+
 ## Huggingface embeddings

 To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
@@ -54,26 +87,17 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g

 ## Llama.cpp embeddings

-Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.
+Embeddings with `llama.cpp` are supported with the `llama` backend.

 ```yaml
 name: my-awesome-model
-backend: llama-cpp
+backend: llama
 embeddings: true
 parameters:
  model: ggml-file.bin
 # ...
 ```

-Then you can use the API to generate embeddings:
-
-```bash
-curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-  "input": "My text",
-  "model": "my-awesome-model"
-}' | jq "."
-```
-
 ## 💡 Examples

 - Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
--- a/Show More
+++ b/Show More