chore(model): add silero-vad model config

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
feat(models): use rwkv from llama.cpp (#4264 )
2026-02-04 11:42:57 -05:00 · 2024-11-26 14:28:41 +01:00 · 2024-11-26 14:22:55 +01:00 · 2024-11-26 14:22:10 +01:00 · 2024-11-26 14:22:03 +01:00 · 2024-11-26 11:12:57 +01:00
69 changed files with 1901 additions and 478 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,2 @@
 *.sh text eol=lf
+backend/cpp/llama/*.hpp linguist-vendored
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -23,7 +23,7 @@ jobs:
          sudo pip install --upgrade pip
          pip install huggingface_hub
      - name: 'Setup yq'
-        uses: dcarbone/install-yq-action@v1.1.1
+        uses: dcarbone/install-yq-action@v1.3.1
        with:
          version: 'v4.44.2'
          download-compressed: true
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -33,7 +33,7 @@ jobs:
        run: |
          CGO_ENABLED=0 make build-api
      - name: rm
-        uses: appleboy/ssh-action@v1.1.0
+        uses: appleboy/ssh-action@v1.2.0
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
@@ -53,7 +53,7 @@ jobs:
            rm: true
            target: ./local-ai
      - name: restarting
-        uses: appleboy/ssh-action@v1.1.0
+        uses: appleboy/ssh-action@v1.2.0
        with:
            host: ${{ secrets.EXPLORER_SSH_HOST }}
            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04
            runs-on: 'ubuntu-latest'
            platforms: 'linux/amd64'
    runs-on: ${{matrix.runs-on}}
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,6 @@ prepare-sources

 go-ggml-transformers
 go-gpt2
-go-rwkv
 whisper.cpp
 /bloomz
 go-bert
--- a/94
+++ b/94
@@ -8,15 +8,11 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=6423c65aa8be1b98f990cf207422505ac5a441a1
-
-# go-rwkv version
-RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
-RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
+CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=31aea563a83803c710691fed3e8d700e06ae6788
+WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -34,6 +30,10 @@ STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
 TINYDREAM_REPO?=https://github.com/M0Rf30/go-tiny-dream
 TINYDREAM_VERSION?=c04fa463ace9d9a6464313aa5f9cd0f953b6c057

+ONNX_VERSION?=1.20.0
+ONNX_ARCH?=x64
+ONNX_OS?=linux
+
 export BUILD_TYPE?=
 export STABLE_BUILD_TYPE?=$(BUILD_TYPE)
 export CMAKE_ARGS?=
@@ -45,6 +45,7 @@ CGO_LDFLAGS_WHISPER+=-lggml
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 GO_TAGS?=
 BUILD_ID?=
+NATIVE?=false

 TEST_DIR=/tmp/test

@@ -83,7 +84,25 @@ ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif

+# IF native is false, we add -DGGML_NATIVE=OFF to CMAKE_ARGS
+ifeq ($(NATIVE),false)
+	CMAKE_ARGS+=-DGGML_NATIVE=OFF
+endif
+
+# Detect if we are running on arm64
+ifneq (,$(findstring aarch64,$(shell uname -m)))
+	ONNX_ARCH=aarch64
+endif
+
 ifeq ($(OS),Darwin)
+	ONNX_OS=osx
+	ifneq (,$(findstring aarch64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else ifneq (,$(findstring arm64,$(shell uname -m)))
+		ONNX_ARCH=arm64
+	else
+		ONNX_ARCH=x86_64
+	endif

 	ifeq ($(OSX_SIGNING_IDENTITY),)
 		OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
@@ -138,10 +157,10 @@ ifeq ($(BUILD_TYPE),hipblas)
 	export CC=$(ROCM_HOME)/llvm/bin/clang
 	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
-	export GGML_HIPBLAS=1
+	export GGML_HIP=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
-	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
+	CMAKE_ARGS+=-DGGML_HIP=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
 	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
 endif

@@ -186,9 +205,9 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
-ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
+ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
 ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
 # Use filter-out to remove the specified backends
 ALL_GRPC_BACKENDS := $(filter-out $(SKIP_GRPC_BACKEND),$(ALL_GRPC_BACKENDS))
@@ -248,20 +267,6 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o

-
-## RWKV
-sources/go-rwkv.cpp:
-	mkdir -p sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && \
-	git init && \
-	git remote add origin $(RWKV_REPO) && \
-	git fetch origin && \
-	git checkout $(RWKV_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
-	cd sources/go-rwkv.cpp && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
-
 ## stable diffusion
 sources/go-stable-diffusion:
 	mkdir -p sources/go-stable-diffusion
@@ -275,6 +280,20 @@ sources/go-stable-diffusion:
 sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
 	CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a

+sources/onnxruntime:
+	mkdir -p sources/onnxruntime
+	curl -L https://github.com/microsoft/onnxruntime/releases/download/v$(ONNX_VERSION)/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz -o sources/onnxruntime/onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && tar -xvf onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz && rm onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION).tgz
+	cd sources/onnxruntime && mv onnxruntime-$(ONNX_OS)-$(ONNX_ARCH)-$(ONNX_VERSION)/* ./
+
+backend-assets/lib/libonnxruntime.so.1: backend-assets/lib sources/onnxruntime
+	cp -rfv sources/onnxruntime/lib/* backend-assets/lib/
+ifeq ($(OS),Darwin)
+	mv backend-assets/lib/libonnxruntime.$(ONNX_VERSION).dylib backend-assets/lib/libonnxruntime.dylib
+else
+	mv backend-assets/lib/libonnxruntime.so.$(ONNX_VERSION) backend-assets/lib/libonnxruntime.so.1
+endif
+
 ## tiny-dream
 sources/go-tiny-dream:
 	mkdir -p sources/go-tiny-dream
@@ -301,10 +320,9 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

-get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp

 replace:
-	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
@@ -314,7 +332,6 @@ replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp

 dropreplace:
-	$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
@@ -330,7 +347,6 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
-	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
 	$(MAKE) -C sources/go-bert.cpp clean
@@ -439,8 +455,6 @@ test-models/testmodel.ggml:
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
-	wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
-	wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
 	cp tests/models_fixtures/* test-models

 prepare-test: grpcs
@@ -761,7 +775,7 @@ backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/ll
 	cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
 # TODO: every binary should have its own folder instead, so can have different metal implementations
 ifeq ($(BUILD_TYPE),metal)
-	cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
+	cp backend/cpp/llama-fallback/llama.cpp/build/bin/ggml-metal.metal backend-assets/grpc/
 endif

 backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -775,7 +789,7 @@ backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/lla
 	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
 	$(MAKE) -C backend/cpp/llama-hipblas purge
 	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
-	BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas

 backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -817,13 +831,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/piper
 endif

-backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/rwkv
-endif
-
 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
@@ -831,6 +838,13 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/stablediffusion
 endif

+backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/silero-vad
+endif
+
 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
@@ -891,7 +905,7 @@ docker-aio-all:

 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -899,7 +913,7 @@ docker-image-intel:

 docker-image-intel-xpu:
 	docker build \
-		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--- a/README.md
+++ b/README.md
@@ -38,6 +38,10 @@
 </a>
 </p>

+<p align="center">
+<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
 >
 > [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/mudler/LocalAI-examples) 
@@ -56,14 +60,17 @@ curl https://localai.io/install.sh | sh

 Or run with docker:
 ```bash
+# CPU only image:
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-cpu
+
+# Nvidia GPU:
+docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
+
+# CPU and GPU image (bigger size):
+docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
+
+# AIO images (it will pre-download a set of models ready for use, see https://localai.io/basics/container/)
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-# Alternative images:
-# - if you have an Nvidia GPU:
-# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
-# - without preconfigured models
-# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
-# - without preconfigured models for Nvidia GPUs
-# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 
 ```

 To load models:
@@ -85,6 +92,7 @@ local-ai run oci://localai/phi-2:latest

 ## 📰 Latest project news

+- Nov 2024: Voice activity detection models (**VAD**) added to the API: https://github.com/mudler/LocalAI/pull/4204
 - Oct 2024: examples moved to [LocalAI-examples](https://github.com/mudler/LocalAI-examples)
 - Aug 2024:  🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
 - July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
@@ -156,6 +164,9 @@ Other:
 - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
 - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
 - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
+- Another Telegram Bot https://github.com/JackBekket/Hellper
+- Auto-documentation https://github.com/JackBekket/Reflexia
+- Github bot which answer on issues, with code and documentation as context https://github.com/JackBekket/GitHelper
 - Github Actions: https://github.com/marketplace/actions/start-localai
 - Examples: https://github.com/mudler/LocalAI/tree/master/examples/
  
@@ -230,7 +241,6 @@ LocalAI couldn't have been built without the help of great software already avai
 - https://github.com/antimatter15/alpaca.cpp
 - https://github.com/EdVince/Stable-Diffusion-NCNN
 - https://github.com/ggerganov/whisper.cpp
- https://github.com/saharNooby/rwkv.cpp
 - https://github.com/rhasspy/piper

 ## 🤗 Contributors
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -28,6 +28,8 @@ service Backend {
  rpc Rerank(RerankRequest) returns (RerankResult) {}

  rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
+
+  rpc VAD(VADRequest) returns (VADResponse) {}
 }

 // Define the empty request
@@ -293,6 +295,19 @@ message TTSRequest {
  optional string language = 5;
 }

+message VADRequest {
+  repeated float audio = 1;
+}
+
+message VADSegment {
+  float start = 1;
+  float end = 2;
+}
+
+message VADResponse {
+  repeated VADSegment segments = 1;
+}
+
 message SoundGenerationRequest {
  string text = 1;
  string model = 2;
--- a/backend/cpp/llama/Makefile
+++ b/backend/cpp/llama/Makefile
@@ -22,7 +22,7 @@ else ifeq ($(BUILD_TYPE),clblas)
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ 
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DGGML_HIPBLAS=ON
+	CMAKE_ARGS+=-DGGML_HIP=ON
 # If it's OSX, DO NOT embed the metal library - -DGGML_METAL_EMBED_LIBRARY=ON requires further investigation
 # But if it's OSX without metal, disable it here
 else ifeq ($(OS),Darwin)
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -203,7 +203,7 @@ struct llama_client_slot
    std::string stopping_word;

    // sampling
-    struct common_sampler_params sparams;
+    struct common_params_sampling sparams;
    common_sampler *ctx_sampling = nullptr;

    int32_t ga_i = 0;   // group-attention state
@@ -662,7 +662,7 @@ struct llama_server_context

    bool launch_slot_with_data(llama_client_slot* &slot, json data) {
        slot_params default_params;
-        common_sampler_params default_sparams;
+        common_params_sampling default_sparams;
 
        slot->params.stream             = json_value(data, "stream",            false);
        slot->params.cache_prompt       = json_value(data, "cache_prompt",      false);
@@ -2299,6 +2299,7 @@ static void params_parse(const backend::ModelOptions* request,
    params.use_mmap = request->mmap();
    params.flash_attn = request->flashattention();
    params.no_kv_offload = request->nokvoffload();
+    params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)

    params.embedding = request->embeddings();

--- a/backend/go/llm/rwkv/rwkv.go
+++ b/backend/go/llm/rwkv/rwkv.go
@@ -1,95 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-	"path/filepath"
-
-	"github.com/donomii/go-rwkv.cpp"
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-const tokenizerSuffix = ".tokenizer.json"
-
-type LLM struct {
-	base.SingleThread
-
-	rwkv *rwkv.RwkvState
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	tokenizerFile := opts.Tokenizer
-	if tokenizerFile == "" {
-		modelFile := filepath.Base(opts.ModelFile)
-		tokenizerFile = modelFile + tokenizerSuffix
-	}
-	modelPath := filepath.Dir(opts.ModelFile)
-	tokenizerPath := filepath.Join(modelPath, tokenizerFile)
-
-	model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
-
-	if model == nil {
-		return fmt.Errorf("rwkv could not load model")
-	}
-	llm.rwkv = model
-	return nil
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	stopWord := "\n"
-	if len(opts.StopPrompts) > 0 {
-		stopWord = opts.StopPrompts[0]
-	}
-
-	if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
-		return "", err
-	}
-
-	response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
-
-	return response, nil
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	go func() {
-
-		stopWord := "\n"
-		if len(opts.StopPrompts) > 0 {
-			stopWord = opts.StopPrompts[0]
-		}
-
-		if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
-			fmt.Println("Error processing input: ", err)
-			return
-		}
-
-		llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
-			results <- s
-			return true
-		})
-		close(results)
-	}()
-
-	return nil
-}
-
-func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
-	tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
-	if err != nil {
-		return pb.TokenizationResponse{}, err
-	}
-
-	l := len(tokens)
-	i32Tokens := make([]int32, l)
-
-	for i, t := range tokens {
-		i32Tokens[i] = int32(t.ID)
-	}
-
-	return pb.TokenizationResponse{
-		Length: int32(l),
-		Tokens: i32Tokens,
-	}, nil
-}
--- a/backend/go/vad/silero/main.go
+++ b/backend/go/vad/silero/main.go
@@ -15,7 +15,7 @@ var (
 func main() {
 	flag.Parse()

-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
+	if err := grpc.StartServer(*addr, &VAD{}); err != nil {
 		panic(err)
 	}
 }
--- a/backend/go/vad/silero/vad.go
+++ b/backend/go/vad/silero/vad.go
@@ -0,0 +1,54 @@
+package main
+
+// This is a wrapper to statisfy the GRPC service interface
+// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
+import (
+	"fmt"
+
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/streamer45/silero-vad-go/speech"
+)
+
+type VAD struct {
+	base.SingleThread
+	detector *speech.Detector
+}
+
+func (vad *VAD) Load(opts *pb.ModelOptions) error {
+	v, err := speech.NewDetector(speech.DetectorConfig{
+		ModelPath:  opts.ModelFile,
+		SampleRate: 16000,
+		//WindowSize:           1024,
+		Threshold:            0.5,
+		MinSilenceDurationMs: 0,
+		SpeechPadMs:          0,
+	})
+	if err != nil {
+		return fmt.Errorf("create silero detector: %w", err)
+	}
+
+	vad.detector = v
+	return err
+}
+
+func (vad *VAD) VAD(req *pb.VADRequest) (pb.VADResponse, error) {
+	audio := req.Audio
+
+	segments, err := vad.detector.Detect(audio)
+	if err != nil {
+		return pb.VADResponse{}, fmt.Errorf("detect: %w", err)
+	}
+
+	vadSegments := []*pb.VADSegment{}
+	for _, s := range segments {
+		vadSegments = append(vadSegments, &pb.VADSegment{
+			Start: float32(s.SpeechStartAt),
+			End:   float32(s.SpeechEndAt),
+		})
+	}
+
+	return pb.VADResponse{
+		Segments: vadSegments,
+	}, nil
+}
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 grpcio-tools
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 packaging==24.1
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.67.1
+grpcio==1.68.0
 pillow
 protobuf
 certifi
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 librosa==0.9.1
 faster-whisper==0.9.0
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 librosa
 faster-whisper
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 certifi
 llvmlite==0.43.0
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements-cpu.txt
+++ b/backend/python/sentencetransformers/requirements-cpu.txt
@@ -2,5 +2,5 @@ torch==2.4.1
 accelerate
 transformers
 bitsandbytes
-sentence-transformers==3.2.0
+sentence-transformers==3.3.1
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch==2.4.1+cu118
 accelerate
-sentence-transformers==3.2.0
+sentence-transformers==3.3.1
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1,4 +1,4 @@
 torch==2.4.1
 accelerate
-sentence-transformers==3.2.0
+sentence-transformers==3.3.1
 transformers
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch==2.4.1+rocm6.0
 accelerate
-sentence-transformers==3.2.0
+sentence-transformers==3.3.1
 transformers
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -4,5 +4,5 @@ torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
 accelerate
-sentence-transformers==3.2.0
+sentence-transformers==3.3.1
 transformers
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 datasets
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -22,7 +22,7 @@ if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
            git clone https://github.com/vllm-project/vllm
        fi
        pushd vllm
-            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.67.1 protobuf bitsandbytes
+            uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.68.0 protobuf bitsandbytes
            uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
            VLLM_TARGET_DEVICE=cpu python setup.py install
        popd
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.67.1
+grpcio==1.68.0
 protobuf
 certifi
 setuptools
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -76,8 +76,14 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 					"util",
 					"llama-cpp-rpc-server",
 				)
-				extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
+				var extraArgs []string
+
+				if r.ExtraLLamaCPPArgs != "" {
+					extraArgs = strings.Split(r.ExtraLLamaCPPArgs, " ")
+				}
 				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
+				log.Debug().Msgf("Starting llama-cpp-rpc-server on '%s:%d' with args: %+v (%d)", address, port, args, len(args))
+
 				args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)

 				cmd := exec.Command(
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -140,7 +140,7 @@ func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath
 		}
 	}

-	cfg.SetDefaults(opts...)
+	cfg.SetDefaults(append(opts, ModelPath(modelPath))...)

 	return cfg, nil
 }
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -345,7 +345,7 @@ var _ = Describe("API test", func() {
 			It("Should fail if the api key is missing", func() {
 				err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
 				Expect(err).ToNot(BeNil())
-				Expect(sc).To(Equal(403))
+				Expect(sc).To(Equal(401))
 			})
 		})

--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -21,10 +21,15 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
 		for b := range appConfig.ExternalGRPCBackends {
 			availableBackends = append(availableBackends, b)
 		}
+
+		sysmodels := []schema.SysInfoModel{}
+		for _, m := range loadedModels {
+			sysmodels = append(sysmodels, schema.SysInfoModel{ID: m.ID})
+		}
 		return c.JSON(
 			schema.SystemInformationResponse{
 				Backends: availableBackends,
-				Models:   loadedModels,
+				Models:   sysmodels,
 			},
 		)
 	}
--- a/core/http/endpoints/localai/vad.go
+++ b/core/http/endpoints/localai/vad.go
@@ -0,0 +1,68 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+// VADEndpoint is Voice-Activation-Detection endpoint
+// @Summary	Detect voice fragments in an audio stream
+// @Accept json
+// @Param		request	body		schema.VADRequest	true	"query params"
+// @Success 200 {object} proto.VADResponse "Response"
+// @Router		/vad [post]
+func VADEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.VADRequest)
+
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+
+		if err != nil {
+			log.Err(err)
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Request for model: %s", modelFile)
+
+		opts := backend.ModelOptions(*cfg, appConfig, model.WithBackendString(cfg.Backend), model.WithModel(modelFile))
+
+		vadModel, err := ml.Load(opts...)
+		if err != nil {
+			return err
+		}
+		req := proto.VADRequest{
+			Audio: input.Audio,
+		}
+		resp, err := vadModel.VAD(c.Context(), &req)
+		if err != nil {
+			return err
+		}
+
+		return c.JSON(resp)
+	}
+}
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -304,7 +304,6 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c
 		config.LoadOptionThreads(threads),
 		config.LoadOptionContextSize(ctx),
 		config.LoadOptionF16(f16),
-		config.ModelPath(loader.ModelPath),
 	)

 	// Set the parameters for the language model prediction
--- a/core/http/middleware/auth.go
+++ b/core/http/middleware/auth.go
@@ -1,95 +1,95 @@
-package middleware
-
-import (
-	"crypto/subtle"
-	"errors"
-
-	"github.com/dave-gray101/v2keyauth"
-	"github.com/gofiber/fiber/v2"
-	"github.com/gofiber/fiber/v2/middleware/keyauth"
-	"github.com/microcosm-cc/bluemonday"
-	"github.com/mudler/LocalAI/core/config"
-)
-
-// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
-// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
-// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
-
-func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
-	customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
-	if err != nil {
-		return nil, err
-	}
-
-	return &v2keyauth.Config{
-		CustomKeyLookup: customLookup,
-		Next:            getApiKeyRequiredFilterFunction(applicationConfig),
-		Validator:       getApiKeyValidationFunction(applicationConfig),
-		ErrorHandler:    getApiKeyErrorHandler(applicationConfig),
-		AuthScheme:      "Bearer",
-	}, nil
-}
-
-func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
-	return func(ctx *fiber.Ctx, err error) error {
-		if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
-			if len(applicationConfig.ApiKeys) == 0 {
-				return ctx.Next() // if no keys are set up, any error we get here is not an error.
-			}
-			if applicationConfig.OpaqueErrors {
-				return ctx.SendStatus(403)
-			}
-			return ctx.Status(403).SendString(bluemonday.StrictPolicy().Sanitize(err.Error()))
-		}
-		if applicationConfig.OpaqueErrors {
-			return ctx.SendStatus(500)
-		}
-		return err
-	}
-}
-
-func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
-
-	if applicationConfig.UseSubtleKeyComparison {
-		return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
-			if len(applicationConfig.ApiKeys) == 0 {
-				return true, nil // If no keys are setup, accept everything
-			}
-			for _, validKey := range applicationConfig.ApiKeys {
-				if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
-					return true, nil
-				}
-			}
-			return false, v2keyauth.ErrMissingOrMalformedAPIKey
-		}
-	}
-
-	return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
-		if len(applicationConfig.ApiKeys) == 0 {
-			return true, nil // If no keys are setup, accept everything
-		}
-		for _, validKey := range applicationConfig.ApiKeys {
-			if apiKey == validKey {
-				return true, nil
-			}
-		}
-		return false, v2keyauth.ErrMissingOrMalformedAPIKey
-	}
-}
-
-func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
-	if applicationConfig.DisableApiKeyRequirementForHttpGet {
-		return func(c *fiber.Ctx) bool {
-			if c.Method() != "GET" {
-				return false
-			}
-			for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
-				if rx.MatchString(c.Path()) {
-					return true
-				}
-			}
-			return false
-		}
-	}
-	return func(c *fiber.Ctx) bool { return false }
-}
+package middleware
+
+import (
+	"crypto/subtle"
+	"errors"
+
+	"github.com/dave-gray101/v2keyauth"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/keyauth"
+	"github.com/mudler/LocalAI/core/config"
+)
+
+// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
+// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
+// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
+
+func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
+	customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key", "cookie:token"}, keyauth.ConfigDefault.AuthScheme)
+	if err != nil {
+		return nil, err
+	}
+
+	return &v2keyauth.Config{
+		CustomKeyLookup: customLookup,
+		Next:            getApiKeyRequiredFilterFunction(applicationConfig),
+		Validator:       getApiKeyValidationFunction(applicationConfig),
+		ErrorHandler:    getApiKeyErrorHandler(applicationConfig),
+		AuthScheme:      "Bearer",
+	}, nil
+}
+
+func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
+	return func(ctx *fiber.Ctx, err error) error {
+		if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return ctx.Next() // if no keys are set up, any error we get here is not an error.
+			}
+			ctx.Set("WWW-Authenticate", "Bearer")
+			if applicationConfig.OpaqueErrors {
+				return ctx.SendStatus(401)
+			}
+			return ctx.Status(401).Render("views/login", nil)
+		}
+		if applicationConfig.OpaqueErrors {
+			return ctx.SendStatus(500)
+		}
+		return err
+	}
+}
+
+func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
+
+	if applicationConfig.UseSubtleKeyComparison {
+		return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+			if len(applicationConfig.ApiKeys) == 0 {
+				return true, nil // If no keys are setup, accept everything
+			}
+			for _, validKey := range applicationConfig.ApiKeys {
+				if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
+					return true, nil
+				}
+			}
+			return false, v2keyauth.ErrMissingOrMalformedAPIKey
+		}
+	}
+
+	return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
+		if len(applicationConfig.ApiKeys) == 0 {
+			return true, nil // If no keys are setup, accept everything
+		}
+		for _, validKey := range applicationConfig.ApiKeys {
+			if apiKey == validKey {
+				return true, nil
+			}
+		}
+		return false, v2keyauth.ErrMissingOrMalformedAPIKey
+	}
+}
+
+func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
+	if applicationConfig.DisableApiKeyRequirementForHttpGet {
+		return func(c *fiber.Ctx) bool {
+			if c.Method() != "GET" {
+				return false
+			}
+			for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
+				if rx.MatchString(c.Path()) {
+					return true
+				}
+			}
+			return false
+		}
+	}
+	return func(c *fiber.Ctx) bool { return false }
+}
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -34,6 +34,7 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	}

 	app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
+	app.Post("/vad", localai.VADEndpoint(cl, ml, appConfig))

 	// Stores
 	sl := model.NewModelLoader("")
--- a/core/http/views/login.html
+++ b/core/http/views/login.html
@@ -0,0 +1,23 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Open Authenticated Website</title>
+</head>
+<body>
+    <h1>Authorization is required</h1>
+    <input type="text" id="token" placeholder="Token" />
+    <button onclick="login()">Login</button>
+    <script>
+        function login() {
+            const token = document.getElementById('token').value;
+            var date = new Date();
+            date.setTime(date.getTime() + (24*60*60*1000));
+            document.cookie = `token=${token}; expires=${date.toGMTString()}`;
+
+            window.location.reload();
+        }
+    </script>
+</body>
+</html>
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -10,6 +10,7 @@ import (
 	"io"
 	"net"
 	"os"
+	"strings"
 	"sync"
 	"time"

@@ -22,6 +23,7 @@ import (
 	"github.com/mudler/edgevpn/pkg/services"
 	"github.com/mudler/edgevpn/pkg/types"
 	eutils "github.com/mudler/edgevpn/pkg/utils"
+	"github.com/multiformats/go-multiaddr"
 	"github.com/phayes/freeport"
 	zlog "github.com/rs/zerolog/log"

@@ -385,11 +387,28 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
 	noLimits := os.Getenv("LOCALAI_P2P_ENABLE_LIMITS") == "true"

-	libp2ploglevel := os.Getenv("LOCALAI_LIBP2P_LOGLEVEL")
+	var listenMaddrs []string
+	var bootstrapPeers []string
+
+	laddrs := os.Getenv("LOCALAI_P2P_LISTEN_MADDRS")
+	if laddrs != "" {
+		listenMaddrs = strings.Split(laddrs, ",")
+	}
+
+	bootmaddr := os.Getenv("LOCALAI_P2P_BOOTSTRAP_PEERS_MADDRS")
+	if bootmaddr != "" {
+		bootstrapPeers = strings.Split(bootmaddr, ",")
+	}
+
+	dhtAnnounceMaddrs := stringsToMultiAddr(strings.Split(os.Getenv("LOCALAI_P2P_DHT_ANNOUNCE_MADDRS"), ","))
+
+	libp2ploglevel := os.Getenv("LOCALAI_P2P_LIB_LOGLEVEL")
 	if libp2ploglevel == "" {
 		libp2ploglevel = "fatal"
 	}
 	c := config.Config{
+		ListenMaddrs:      listenMaddrs,
+		DHTAnnounceMaddrs: dhtAnnounceMaddrs,
 		Limit: config.ResourceLimit{
 			Enable:   noLimits,
 			MaxConns: 100,
@@ -411,9 +430,10 @@ func newNodeOpts(token string) ([]node.Option, error) {
 			RateLimitInterval: defaultInterval,
 		},
 		Discovery: config.Discovery{
-			DHT:      !noDHT,
-			MDNS:     true,
-			Interval: 10 * time.Second,
+			DHT:            !noDHT,
+			MDNS:           true,
+			Interval:       10 * time.Second,
+			BootstrapPeers: bootstrapPeers,
 		},
 		Connection: config.Connection{
 			HolePunch:      true,
@@ -432,6 +452,18 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	return nodeOpts, nil
 }

+func stringsToMultiAddr(peers []string) []multiaddr.Multiaddr {
+	res := []multiaddr.Multiaddr{}
+	for _, p := range peers {
+		addr, err := multiaddr.NewMultiaddr(p)
+		if err != nil {
+			continue
+		}
+		res = append(res, addr)
+	}
+	return res
+}
+
 func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
 	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
 	io.Copy(dst, src)
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -2,7 +2,6 @@ package schema

 import (
 	"github.com/mudler/LocalAI/core/p2p"
-	"github.com/mudler/LocalAI/pkg/model"
 	gopsutil "github.com/shirou/gopsutil/v3/process"
 )

@@ -31,10 +30,16 @@ type TTSRequest struct {
 	Input    string `json:"input" yaml:"input"` // text input
 	Voice    string `json:"voice" yaml:"voice"` // voice audio file or speaker id
 	Backend  string `json:"backend" yaml:"backend"`
-	Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
+	Language string `json:"language,omitempty" yaml:"language,omitempty"`               // (optional) language to use with TTS model
 	Format   string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
 }

+// @Description VAD request body
+type VADRequest struct {
+	Model string    `json:"model" yaml:"model"` // model name or full path
+	Audio []float32 `json:"audio" yaml:"audio"` // model name or full path
+}
+
 type StoresSet struct {
 	Store string `json:"store,omitempty" yaml:"store,omitempty"`

@@ -77,7 +82,11 @@ type P2PNodesResponse struct {
 	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
 }

-type SystemInformationResponse struct {
-	Backends []string      `json:"backends"`
-	Models   []model.Model `json:"loaded_models"`
+type SysInfoModel struct {
+	ID string `json:"id"`
+}
+
+type SystemInformationResponse struct {
+	Backends []string       `json:"backends"`
+	Models   []SysInfoModel `json:"loaded_models"`
 }
--- a/docs/content/docs/advanced/run-other-models.md
+++ b/docs/content/docs/advanced/run-other-models.md
@@ -18,7 +18,7 @@ There are different categories of models: [LLMs]({{%relref "docs/features/text-g

 {{% alert icon="💡" %}}

-To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
+To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
 {{% /alert %}}

 {{< tabs tabTotal="3" >}}
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -131,9 +131,13 @@ There are options that can be tweaked or parameters that can be set using enviro
 |----------------------|-------------|
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_ENABLE_LIMITS** | Set to "true" to enable connection limits and resources management (useful when running with poor connectivity or want to limit resources consumption) |
+| **LOCALAI_P2P_LISTEN_MADDRS** | Set to comma separated list of multiaddresses to override default libp2p 0.0.0.0 multiaddresses |
+| **LOCALAI_P2P_DHT_ANNOUNCE_MADDRS** | Set to comma separated list of multiaddresses to override announcing of listen multiaddresses (useful when external address:port is remapped) |
+| **LOCALAI_P2P_BOOTSTRAP_PEERS_MADDRS** | Set to comma separated list of multiaddresses to specify custom DHT bootstrap nodes |
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
 | **LOCALAI_P2P_LOGLEVEL** | Set the loglevel for the LocalAI p2p stack (default: info) |
-| **LOCALAI_LIBP2P_LOGLEVEL** | Set the loglevel for the underlying libp2p stack (default: fatal) |
+| **LOCALAI_P2P_LIB_LOGLEVEL** | Set the loglevel for the underlying libp2p stack (default: fatal) |
+

 ## Architecture

--- a/docs/content/docs/integrations.md
+++ b/docs/content/docs/integrations.md
@@ -29,5 +29,7 @@ The list below is a list of software that integrates with LocalAI.
 - [Big AGI](https://github.com/enricoros/big-agi) is a powerful web interface entirely running in the browser, supporting LocalAI
 - [Midori AI Subsystem Manager](https://io.midori-ai.xyz/subsystem/manager/) is a powerful docker subsystem for running all types of AI programs
 - [LLPhant](https://github.com/theodo-group/LLPhant) is a PHP library for interacting with LLMs and Vector Databases
+- [GPTLocalhost (Word Add-in)](https://gptlocalhost.com/demo#LocalAI) - run LocalAI in Microsoft Word locally
+- use LocalAI from Nextcloud with the [integration plugin](https://apps.nextcloud.com/apps/integration_openai) and [AI assistant](https://apps.nextcloud.com/apps/assistant)

 Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.22.1"
+  "version": "v2.23.0"
 }
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,189 @@
 ---
+- &qwen25coder
+  name: "qwen2.5-coder-14b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  license: apache-2.0
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - qwen
+    - qwen2.5
+    - cpu
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-14B
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-14B-GGUF
+  description: |
+    Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). As of now, Qwen2.5-Coder has covered six mainstream model sizes, 0.5, 1.5, 3, 7, 14, 32 billion parameters, to meet the needs of different developers. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:
+
+        Significantly improvements in code generation, code reasoning and code fixing. Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including source code, text-code grounding, Synthetic data, etc. Qwen2.5-Coder-32B has become the current state-of-the-art open-source codeLLM, with its coding abilities matching those of GPT-4o.
+        A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.
+        Long-context Support up to 128K tokens.
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-14B.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-14B.Q4_K_M.gguf
+      sha256: 94f277a9ac7caf117140b2fff4e1ccf4bc9f35395b0112f0d0d7c82c6f8d860e
+      uri: huggingface://mradermacher/Qwen2.5-Coder-14B-GGUF/Qwen2.5-Coder-14B.Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-3b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-3B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf
+      sha256: 3da3afe6cf5c674ac195803ea0dd6fee7e1c228c2105c1ce8c66890d1d4ab460
+      uri: huggingface://bartowski/Qwen2.5-Coder-3B-Instruct-GGUF/Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-32b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf
+      sha256: 8e2fd78ff55e7cdf577fda257bac2776feb7d73d922613caf35468073807e815
+      uri: huggingface://bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-14b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf
+      sha256: 2946d28c9e1bb2bcae6d42e8678863a31775df6f740315c7d7e6d6b6411f5937
+      uri: huggingface://bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-1.5b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf
+      sha256: f530705d447660a4336c329981af164b471b60b974b1d808d57e8ec9fe23b239
+      uri: huggingface://bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF/Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-7b-instruct"
+  urls:
+    - https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct
+    - https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+      sha256: 1664fccab734674a50763490a8c6931b70e3f2f8ec10031b54806d30e5f956b6
+      uri: huggingface://bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-7b-3x-instruct-ties-v1.2-i1"
+  urls:
+    - https://huggingface.co/BenevolenceMessiah/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2-i1-GGUF
+  description: |
+    The following models were included in the merge:
+        BenevolenceMessiah/Qwen2.5-Coder-7B-Chat-Instruct-TIES-v1.2
+        MadeAgents/Hammer2.0-7b
+        huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf
+      sha256: c28a4da700f634f1277f02391d81fa3c0ba783fa4b02886bd4bfe5f13b6605ef
+      uri: huggingface://mradermacher/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2-i1-GGUF/Qwen2.5-Coder-7B-3x-Instruct-TIES-v1.2.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-7b-instruct-abliterated-i1"
+  urls:
+    - https://huggingface.co/huihui-ai/Qwen2.5-Coder-7B-Instruct-abliterated
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-7B-Instruct-abliterated-i1-GGUF
+  description: |
+    This is an uncensored version of Qwen2.5-Coder-7B-Instruct created with abliteration (see this article to know more about it).
+
+    Special thanks to @FailSpy for the original code and technique. Please follow him if you're interested in abliterated models.
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf
+      sha256: 9100ccd9e8167cefda98bd1c97d5d765a21e70e124e4d6b89945fd66ebb481b4
+      uri: huggingface://mradermacher/Qwen2.5-Coder-7B-Instruct-abliterated-i1-GGUF/Qwen2.5-Coder-7B-Instruct-abliterated.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "rombos-coder-v2.5-qwen-7b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-7b
+    - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-7b-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    Rombos-Coder-V2.5-Qwen-7b is a continues finetuned version of Qwen2.5-Coder-7B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the * Ties* merge method as demonstrated in my own "Continuous Finetuning" method (link available).
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf
+  files:
+    - filename: Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf
+      sha256: ca16a550f1be00b7e92f94c0c18ea6af1e5c158d5d1cb3994f9f0a0d13922272
+      uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-7b-GGUF/Rombos-Coder-V2.5-Qwen-7b-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "rombos-coder-v2.5-qwen-32b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-32b
+    - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-32b-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    Rombos-Coder-V2.5-Qwen-32b is a continues finetuned version of Qwen2.5-Coder-32B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the Ties merge method as demonstrated in my own "Continuous Finetuning" method (link available).
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf
+  files:
+    - filename: Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf
+      sha256: 821ea2a13d96354db1368986700b1189938fbbc56ca6bb9d0c39f752580de71a
+      uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-32b-GGUF/Rombos-Coder-V2.5-Qwen-32b-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "rombos-coder-v2.5-qwen-14b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/QErypCEKD5OZLxUcSmYaR.jpeg
+  urls:
+    - https://huggingface.co/rombodawg/Rombos-Coder-V2.5-Qwen-14b
+    - https://huggingface.co/bartowski/Rombos-Coder-V2.5-Qwen-14b-GGUF
+    - https://docs.google.com/document/d/1OjbjU5AOz4Ftn9xHQrX3oFQGhQ6RDUuXQipnQ9gn6tU/edit?usp=sharing
+  description: |
+    Rombos-Coder-V2.5-Qwen-14b is a continues finetuned version of Qwen2.5-Coder-14B-Instruct. I took it upon myself to merge the instruct model with the base model myself using the Ties merge method as demonstrated in my own "Continuous Finetuning" method (link available).
+    This version of the model shows higher performance than the original instruct and base models.
+  overrides:
+    parameters:
+      model: Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf
+  files:
+    - filename: Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf
+      sha256: 7ef044e1fee206a039f56538f94332030e99ec63915c74f4d1bdec0e601ee968
+      uri: huggingface://bartowski/Rombos-Coder-V2.5-Qwen-14b-GGUF/Rombos-Coder-V2.5-Qwen-14b-Q4_K_M.gguf
+- !!merge <<: *qwen25coder
+  name: "qwen2.5-coder-32b-instruct-uncensored-i1"
+  urls:
+    - https://huggingface.co/thirdeyeai/Qwen2.5-Coder-32B-Instruct-Uncensored
+    - https://huggingface.co/mradermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF
+  description: |
+    The LLM model is based on sloshywings/Qwen2.5-Coder-32B-Instruct-Uncensored. It is a large language model with 32B parameters that has been fine-tuned on coding tasks and instructions.
+  overrides:
+    parameters:
+      model: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
+      sha256: 86ac8efb86daf241792ac3d5d35b7da92c54901b4208a6f2829bd03d8f273c9c
+      uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
 - &opencoder
  name: "opencoder-8b-base"
  icon: https://github.com/OpenCoder-llm/opencoder-llm.github.io/blob/main/static/images/opencoder_icon.jpg?raw=true
@@ -368,6 +553,133 @@
    - filename: Llama-3.2-3B-Instruct-uncensored-Q4_K_M.gguf
      sha256: 80f532552e3d56e366226f428395de8285a671f2da1d5fd68563741181b77a95
      uri: huggingface://bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF/Llama-3.2-3B-Instruct-uncensored-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "calme-3.3-llamaloi-3b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful meta-llama/Llama-3.2-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.3-llamaloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.3-llamaloi-3b.Q5_K_M.gguf
+      sha256: d3b9d47faa9e968a93a8f52bd4cdc938e5a612facb963088367ca871063ef302
+      uri: huggingface://MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/calme-3.3-llamaloi-3b.Q5_K_M.gguf
+- !!merge <<: *llama32
+  name: "calme-3.2-llamaloi-3b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-llamaloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-llamaloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful meta-llama/Llama-3.2-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.2-llamaloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.2-llamaloi-3b.Q5_K_M.gguf
+      sha256: bd11e6a717008d0603b6da5faab2fa2ba18b376c5589245735340cfb0a8dabb9
+      uri: huggingface://MaziyarPanahi/calme-3.2-llamaloi-3b-GGUF/calme-3.2-llamaloi-3b.Q5_K_M.gguf
+- !!merge <<: *llama32
+  name: "calme-3.1-llamaloi-3b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-llamaloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-llamaloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful meta-llama/Llama-3.2-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.1-llamaloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.1-llamaloi-3b.Q5_K_M.gguf
+      sha256: 06b900c7252423329ca57a02a8b8d18a1294934709861d09af96e74694c9a3f1
+      uri: huggingface://MaziyarPanahi/calme-3.1-llamaloi-3b-GGUF/calme-3.1-llamaloi-3b.Q5_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama3.2-3b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+    - https://huggingface.co/QuantFactory/Llama3.2-3B-Enigma-GGUF
+  description: |
+    ValiantLabs/Llama3.2-3B-Enigma is an Enigma model built on Llama 3.2 3b. It is a high-quality code-instruct model with the Llama 3.2 Instruct chat format. The model is finetuned on synthetic code-instruct data generated using Llama 3.1 405b and supplemented with generalist synthetic data. This model is suitable for both code-instruct and general chat applications.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-Enigma.Q4_K_M.gguf
+      sha256: 4304e6ee1e348b228470700ec1e9423f5972333d376295195ce6cd5c70cae5e4
+      uri: huggingface://QuantFactory/Llama3.2-3B-Enigma-GGUF/Llama3.2-3B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/EXX7TKbB-R6arxww2mk0R.jpeg
+  name: "llama3.2-3b-shiningvaliant2-i1"
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.2-3B-ShiningValiant2
+    - https://huggingface.co/mradermacher/Llama3.2-3B-ShiningValiant2-i1-GGUF
+  description: |
+    Shining Valiant 2 is a chat model built on Llama 3.2 3b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
+
+        Finetuned on meta-llama/Llama-3.2-3B-Instruct for best available general performance
+        Trained on a variety of high quality data; focused on science, engineering, technical knowledge, and structured reasoning
+        Also available for Llama 3.1 70b and Llama 3.1 8b!
+
+    Version
+    This is the 2024-09-27 release of Shining Valiant 2 for Llama 3.2 3b.
+  overrides:
+    parameters:
+      model: Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+  files:
+    - filename: Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+      sha256: 700521dc6a8a50e2d0bb5ccde12399209004155f9c68751aeac7feccf2cd4957
+      uri: huggingface://mradermacher/Llama3.2-3B-ShiningValiant2-i1-GGUF/Llama3.2-3B-ShiningValiant2.i1-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-doctor-3.2-3b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-Doctor-3.2-3B-Instruct
+    - https://huggingface.co/bartowski/Llama-Doctor-3.2-3B-Instruct-GGUF
+  description: |
+    The Llama-Doctor-3.2-3B-Instruct model is designed for text generation tasks, particularly in contexts where instruction-following capabilities are needed. This model is a fine-tuned version of the base Llama-3.2-3B-Instruct model and is optimized for understanding and responding to user-provided instructions or prompts. The model has been trained on a specialized dataset, avaliev/chat_doctor, to enhance its performance in providing conversational or advisory responses, especially in medical or technical fields.
+  overrides:
+    parameters:
+      model: Llama-Doctor-3.2-3B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: Llama-Doctor-3.2-3B-Instruct-Q4_K_M.gguf
+      sha256: 38fd1423e055564e9fa3d37003a62bf9db79acd348a90fa0b051a1f2c9d7cb53
+      uri: huggingface://bartowski/Llama-Doctor-3.2-3B-Instruct-GGUF/Llama-Doctor-3.2-3B-Instruct-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "onellm-doey-v1-llama-3.2-3b"
+  urls:
+    - https://huggingface.co/DoeyLLM/OneLLM-Doey-V1-Llama-3.2-3B
+    - https://huggingface.co/QuantFactory/OneLLM-Doey-V1-Llama-3.2-3B-GGUF
+  description: |
+    This model is a fine-tuned version of LLaMA 3.2-3B, optimized using LoRA (Low-Rank Adaptation) on the NVIDIA ChatQA-Training-Data. It is tailored for conversational AI, question answering, and other instruction-following tasks, with support for sequences up to 1024 tokens.
+  overrides:
+    parameters:
+      model: OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf
+  files:
+    - filename: OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf
+      sha256: 57e93584bfb708a9841edffd70635c21f27955d8a1b4e346a72edc8163394a97
+      uri: huggingface://QuantFactory/OneLLM-Doey-V1-Llama-3.2-3B-GGUF/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "llama-sentient-3.2-3b-instruct"
+  urls:
+    - https://huggingface.co/prithivMLmods/Llama-Sentient-3.2-3B-Instruct
+    - https://huggingface.co/QuantFactory/Llama-Sentient-3.2-3B-Instruct-GGUF
+  description: |
+    The Llama-Sentient-3.2-3B-Instruct model is a fine-tuned version of the Llama-3.2-3B-Instruct model, optimized for text generation tasks, particularly where instruction-following abilities are critical. This model is trained on the mlabonne/lmsys-arena-human-preference-55k-sharegpt dataset, which enhances its performance in conversational and advisory contexts, making it suitable for a wide range of applications.
+  overrides:
+    parameters:
+      model: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
+      uri: huggingface://QuantFactory/Llama-Sentient-3.2-3B-Instruct-GGUF/Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
+      sha256: 3f855ce0522bfdc39fc826162ba6d89f15cc3740c5207da10e70baa3348b7812
 - &qwen25
  ## Qwen2.5
  name: "qwen2.5-14b-instruct"
@@ -980,6 +1292,285 @@
    - filename: TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf
      sha256: a654fe3f41e963d8ea6753fb9a06b9dd76893714ebf02605ef67827944a4025e
      uri: huggingface://bartowski/TQ2.5-14B-Sugarquill-v1-GGUF/TQ2.5-14B-Sugarquill-v1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.3-baguette-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-baguette-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-baguette-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English.
+  overrides:
+    parameters:
+      model: calme-3.3-baguette-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.3-baguette-3b.Q5_K_M.gguf
+      sha256: 9e75b76e8cda215ef5c9ad79edfc6e5deee2f9e01ecf605ee6a557b1b5c9ef85
+      uri: huggingface://MaziyarPanahi/calme-3.3-baguette-3b-GGUF/calme-3.3-baguette-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.2-baguette-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-baguette-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-baguette-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English.
+  overrides:
+    parameters:
+      model: calme-3.2-baguette-3b.Q4_K_M.gguf
+  files:
+    - filename: calme-3.2-baguette-3b.Q4_K_M.gguf
+      uri: huggingface://MaziyarPanahi/calme-3.2-baguette-3b-GGUF/calme-3.2-baguette-3b.Q4_K_M.gguf
+      sha256: 4e62fe0108643bbfd842add5a1bf199e9b81b0181309b15f483e1f07c2b5fbb2
+- !!merge <<: *qwen25
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b/resolve/main/calme_3.png
+  name: "calme-3.1-baguette-3b"
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-baguette-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, fine-tuned specifically to enhance its capabilities across general domains in both French and English.
+  overrides:
+    parameters:
+      model: calme-3.1-baguette-3b.Q4_K_M.gguf
+  files:
+    - filename: calme-3.1-baguette-3b.Q4_K_M.gguf
+      uri: huggingface://MaziyarPanahi/calme-3.1-baguette-3b-GGUF/calme-3.1-baguette-3b.Q4_K_M.gguf
+      sha256: 351058680d633749fa64efde205bd5f3d942aacada3204c594d9acfab2fc8774
+- !!merge <<: *qwen25
+  name: "calme-3.3-qwenloi-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.3-qwenloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.3-qwenloi-3b.Q5_K_M.gguf
+      sha256: 9592e186a00c70552365d85ccabddae87acc8d812634a6145da8d460b57b70f9
+      uri: huggingface://MaziyarPanahi/calme-3.3-qwenloi-3b-GGUF/calme-3.3-qwenloi-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.2-qwenloi-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-qwenloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.2-qwenloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.2-qwenloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.2-qwenloi-3b.Q5_K_M.gguf
+      sha256: 61be0c2f221262523dcd00a9147fe590aba797c89a1c5849bd4f66e7df2ad272
+      uri: huggingface://MaziyarPanahi/calme-3.2-qwenloi-3b-GGUF/calme-3.2-qwenloi-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "calme-3.1-qwenloi-3b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-3.3-qwenloi-3b/resolve/main/calme_3.png
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-qwenloi-3b
+    - https://huggingface.co/MaziyarPanahi/calme-3.1-qwenloi-3b-GGUF
+  description: |
+    This model is an advanced iteration of the powerful Qwen/Qwen2.5-3B, specifically fine-tuned to enhance its capabilities in French Legal domain.
+  overrides:
+    parameters:
+      model: calme-3.1-qwenloi-3b.Q5_K_M.gguf
+  files:
+    - filename: calme-3.1-qwenloi-3b.Q5_K_M.gguf
+      sha256: 8962a8d1704979039063b5c69fafdb38b545c26143419ec4c574f37f2d6dd7b2
+      uri: huggingface://MaziyarPanahi/calme-3.1-qwenloi-3b-GGUF/calme-3.1-qwenloi-3b.Q5_K_M.gguf
+- !!merge <<: *qwen25
+  name: "eva-qwen2.5-72b-v0.1-i1"
+  urls:
+    - https://huggingface.co/EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1
+    - https://huggingface.co/mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF
+  description: |
+    A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-72B on mixture of synthetic and natural data.
+    It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.
+
+    Dedicated to Nev.
+
+    Version notes for 0.1: Reprocessed dataset (via Cahvay for 32B 0.2, used here as well), readjusted training config for 8xH100 SXM. Significant improvements in instruction following, long context understanding and overall coherence over v0.0.
+  overrides:
+    parameters:
+      model: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
+      sha256: b05dbc02eeb286c41122b103ac31431fc8dcbd80b8979422541a05cda53df61b
+      uri: huggingface://mradermacher/EVA-Qwen2.5-72B-v0.1-i1-GGUF/EVA-Qwen2.5-72B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "celestial-harmony-14b-v1.0-experimental-1016-i1"
+  urls:
+    - https://huggingface.co/ProdeusUnity/Celestial-Harmony-14b-v1.0-Experimental-1016
+    - https://huggingface.co/mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF
+  description: |
+    Yet Another merge, this one for AuriAetherwiing, at their request.
+    This is a merge of pre-trained language models created using mergekit.
+    The following models were included in the merge:
+        EVA-UNIT-01/EVA-Qwen2.5-14B-v0.1
+        v000000/Qwen2.5-Lumen-14B
+        arcee-ai/SuperNova-Medius
+  overrides:
+    parameters:
+      model: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+  files:
+    - filename: Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+      sha256: 536a6d98e30e9d52f91672daf49eeb7efe076e161a5da8beaca204adedd76864
+      uri: huggingface://mradermacher/Celestial-Harmony-14b-v1.0-Experimental-1016-i1-GGUF/Celestial-Harmony-14b-v1.0-Experimental-1016.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-32b-arliai-rpmax-v1.3"
+  urls:
+    - https://huggingface.co/ArliAI/Qwen2.5-32B-ArliAI-RPMax-v1.3
+    - https://huggingface.co/bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+    Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred.
+  overrides:
+    parameters:
+      model: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+      sha256: 51b369068b124165b1b8c253371b88b573af9dd350e331ce93d7e47b6b710003
+      uri: huggingface://bartowski/Qwen2.5-32B-ArliAI-RPMax-v1.3-GGUF/Qwen2.5-32B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "q2.5-ms-mistoria-72b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/5LOvUFYiMMw6pcEsOhmo2.webp
+  urls:
+    - https://huggingface.co/Steelskull/Q2.5-MS-Mistoria-72b
+    - https://huggingface.co/mradermacher/Q2.5-MS-Mistoria-72b-i1-GGUF
+  description: |
+    This model is my fist attempt at a 72b model as usual my goal is to merge the robust storytelling of mutiple models while attempting to maintain intelligence.
+    Merge of:
+      - model: EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1
+      - model: ZeusLabs/Chronos-Platinum-72B
+      - model: shuttleai/shuttle-3
+  overrides:
+    parameters:
+      model: Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf
+      sha256: f51ac3db855259c0132070e7bb9f58b67538103ffb3c716880ceef3bb09d43d9
+      uri: huggingface://mradermacher/Q2.5-MS-Mistoria-72b-i1-GGUF/Q2.5-MS-Mistoria-72b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "athene-v2-agent"
+  icon: https://huggingface.co/Nexusflow/Athene-V2-Agent/resolve/main/agent.png
+  urls:
+    - https://huggingface.co/Nexusflow/Athene-V2-Agent
+    - https://huggingface.co/bartowski/Athene-V2-Agent-GGUF
+  description: "Athene-V2-Agent is an open-source Agent LLM that surpasses the state-of-the-art in function calling and agentic capabilities.\n\n\U0001F4AA Versatile Agent Capability: Athene-V2-Agent is an agent model, capable of operating in environments with deeply nested dependencies with the environment. It is capable of reasoning and doing planning for trajectories with many tool calls necessary to answer a single query.\n\n\U0001F4CA Performance Highlights: Athene-V2-Agent surpasses GPT-4o in single FC tasks by 18% in function calling success rates, and by 17% in Agentic success rates.\n\n\U0001F527 Generalization to the Unseen: Athene-V2-Agent has never been trained on the functions or agentic settings used in evaluation.\n"
+  overrides:
+    parameters:
+      model: Athene-V2-Agent-Q4_K_M.gguf
+  files:
+    - filename: Athene-V2-Agent-Q4_K_M.gguf
+      sha256: 2829d205519da34852c374286d42a4403f3be012ea56424e88ebcb8dc89676ad
+      uri: huggingface://bartowski/Athene-V2-Agent-GGUF/Athene-V2-Agent-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "athene-v2-chat"
+  urls:
+    - https://huggingface.co/Nexusflow/Athene-V2-Chat
+    - https://huggingface.co/bartowski/Athene-V2-Chat-GGUF
+  description: |
+    We introduce Athene-V2-Chat-72B, an open-weights LLM on-par with GPT-4o across benchmarks. It is trained through RLHF with Qwen-2.5-72B-Instruct as base model. Athene-V2-Chat-72B excels in chat, math, and coding. Its sister model, Athene-V2-Agent-72B, surpasses GPT-4o in complex function calling and agentic applications.
+  overrides:
+    parameters:
+      model: Athene-V2-Chat-Q4_K_M.gguf
+  files:
+    - filename: Athene-V2-Chat-Q4_K_M.gguf
+      sha256: bda8b784ad55982891e5aa69b08ce4030c91a2e28ad9c4c35284d45d3c7aeb16
+      uri: huggingface://bartowski/Athene-V2-Chat-GGUF/Athene-V2-Chat-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-7b-nerd-uncensored-v1.7"
+  urls:
+    - https://huggingface.co/jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7
+    - https://huggingface.co/mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF
+  description: |
+    Model created by analyzing and selecting the optimal layers from other Qwen2.5-7B models based on their dimensional utilization efficiency, measured by the Normalized Effective Rank (NER). Computed like:
+    Input: Weight matrix for each model layer
+    Compute singular values σᵢ where σᵢ ≥ 0 # σᵢ represents the importance of each dimension
+    Filter values above numerical threshold (>1e-12)
+    Sum all singular values: S = Σσᵢ # S acts as normalization factor
+    Create probability distribution: pᵢ = σᵢ/S # converts singular values to probabilities summing to 1
+    Compute Shannon entropy: H = -Σ(pᵢ * log₂(pᵢ)) # measures information content
+    Calculate maximum possible entropy: H_max = log₂(n)
+    Final NER score = H/H_max # normalizes score to [0,1] range
+    Results in value between 0 and 1 for each model layer
+  overrides:
+    parameters:
+      model: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+      sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8
+      uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  icon: https://i.imgur.com/OxX2Usi.png
+  name: "evathene-v1.0"
+  urls:
+    - https://huggingface.co/sophosympatheia/Evathene-v1.0
+    - https://huggingface.co/bartowski/Evathene-v1.0-GGUF
+  description: |
+    This 72B parameter model is a merge of Nexusflow/Athene-V2-Chat with EVA-UNIT-01/EVA-Qwen2.5-72B-v0.1. See the merge recipe below for details.
+
+    This model is uncensored. You are responsible for whatever you do with it.
+
+    This model was designed for roleplaying and storytelling and I think it does well at both. It may also perform well at other tasks but I have not tested its performance in other areas.
+  overrides:
+    parameters:
+      model: Evathene-v1.0-Q4_K_M.gguf
+  files:
+    - filename: Evathene-v1.0-Q4_K_M.gguf
+      sha256: 96401ba9d798faa8a01f579b54523c5f75277e91bf1f0eee93db285f76f61e7e
+      uri: huggingface://bartowski/Evathene-v1.0-GGUF/Evathene-v1.0-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "miniclaus-qw1.5b-unamgs"
+  icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png
+  urls:
+    - https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS
+    - https://huggingface.co/bartowski/miniclaus-qw1.5B-UNAMGS-GGUF
+  description: |
+    Trained with Magpie-Align/Magpie-Pro-MT-300K-v0.1
+    Using MGS & UNA (MLP) on this tiny but powerful model.
+  overrides:
+    parameters:
+      model: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+  files:
+    - filename: miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+      sha256: a0dadd7147cc4a8e8df59659556e4d824ef5c26fd2f39381fe467b2ff9cc1289
+      uri: huggingface://bartowski/miniclaus-qw1.5B-UNAMGS-GGUF/miniclaus-qw1.5B-UNAMGS-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "qwen2.5-3b-smart-i1"
+  urls:
+    - https://huggingface.co/bunnycore/Qwen2.5-3B-Smart
+    - https://huggingface.co/mradermacher/Qwen2.5-3B-Smart-i1-GGUF
+  description: |
+    This model was merged using the passthrough merge method using bunnycore/Qwen2.5-3B-RP-Mix + bunnycore/Qwen2.5-3b-Smart-lora_model as a base.
+  overrides:
+    parameters:
+      model: Qwen2.5-3B-Smart.i1-Q4_K_M.gguf
+  files:
+    - filename: Qwen2.5-3B-Smart.i1-Q4_K_M.gguf
+      sha256: 4cfffa4478191b3ac5f54b0e2c5c3f60883322cf705d74f9651715b70f3779f4
+      uri: huggingface://mradermacher/Qwen2.5-3B-Smart-i1-GGUF/Qwen2.5-3B-Smart.i1-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "steyrcannon-0.2-qwen2.5-72b"
+  urls:
+    - https://huggingface.co/KaraKaraWitch/SteyrCannon-0.2-Qwen2.5-72b
+    - https://huggingface.co/mradermacher/SteyrCannon-0.2-Qwen2.5-72b-GGUF
+  description: |
+    SteyrCannon-0.2 is an updated revision from the original SteyrCannon. This uses EVA-Qwen2.5-72B-v0.2. Nothing else has changed.This model was merged using the TIES merge method using EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2 as a base.
+    The following models were included in the merge:
+        anthracite-org/magnum-v4-72b
+        EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2
+  overrides:
+    parameters:
+      model: SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf
+  files:
+    - filename: SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf
+      sha256: b34c08b77ffd25ccb0ca50b167f2215e784689205c93a0903fa9435b6cc187f0
+      uri: huggingface://mradermacher/SteyrCannon-0.2-Qwen2.5-72b-GGUF/SteyrCannon-0.2-Qwen2.5-72b.Q4_K_M.gguf
 - &archfunct
  license: apache-2.0
  tags:
@@ -1574,6 +2165,20 @@
    - filename: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
      sha256: 9eaa08a4872a26f56fe34b27a99f7bd0d22ee2b2d1c84cfcde2091b5f61af5fa
      uri: huggingface://mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF/Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1_openscholar-8b"
+  urls:
+    - https://huggingface.co/OpenScholar/Llama-3.1_OpenScholar-8B
+    - https://huggingface.co/bartowski/Llama-3.1_OpenScholar-8B-GGUF
+  description: |
+    Llama-3.1_OpenScholar-8B is a fine-tuned 8B for scientific literature synthesis. The Llama-3.1_OpenScholar-8B us trained on the os-data dataset. Developed by: University of Washigton, Allen Institute for AI (AI2)
+  overrides:
+    parameters:
+      model: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
+      sha256: 54865fc86451959b495c494a51bb1806c8b62bf1415600f0da2966a8a1fe6c7d
+      uri: huggingface://bartowski/Llama-3.1_OpenScholar-8B-GGUF/Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
  name: "humanish-roleplay-llama-3.1-8b-i1"
@@ -2291,6 +2896,243 @@
    - filename: Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf
      sha256: 92da5dad8a36ed5060becf78a83537d776079b7eaa4de73733d3ca57156286ab
      uri: huggingface://bartowski/Tess-R1-Limerick-Llama-3.1-70B-GGUF/Tess-R1-Limerick-Llama-3.1-70B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "tess-3-llama-3.1-70b"
+  icon: https://huggingface.co/migtissera/Tess-M-v1.0/resolve/main/Tess.png
+  urls:
+    - https://huggingface.co/migtissera/Tess-3-Llama-3.1-70B
+    - https://huggingface.co/mradermacher/Tess-3-Llama-3.1-70B-GGUF
+  description: |
+    Tess, short for Tesoro (Treasure in Italian), is a general purpose Large Language Model series created by Migel Tissera.
+  overrides:
+    parameters:
+      model: Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+  files:
+    - filename: Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+      sha256: 81625defcbea414282f490dd960b14afdecd7734e0d77d8db2da2bf5c21261aa
+      uri: huggingface://mradermacher/Tess-3-Llama-3.1-70B-GGUF/Tess-3-Llama-3.1-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-enigma"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/it7MY5MyLCLpFQev5dUis.jpeg
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Enigma
+    - https://huggingface.co/mradermacher/Llama3.1-8B-Enigma-GGUF
+  description: |
+    Enigma is a code-instruct model built on Llama 3.1 8b.
+    High quality code instruct performance within the Llama 3 Instruct chat format
+    Finetuned on synthetic code-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here!
+    Overall chat performance supplemented with generalist synthetic data.
+    This is the 2024-10-02 release of Enigma for Llama 3.1 8b, enhancing code-instruct and general chat capabilities.
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Enigma.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Enigma.Q4_K_M.gguf
+      sha256: e98c9909ee3b74b11d50d4c4f17178502e42cd936215ede0c64a7b217ae665bb
+      uri: huggingface://mradermacher/Llama3.1-8B-Enigma-GGUF/Llama3.1-8B-Enigma.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-cobalt"
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Cobalt
+    - https://huggingface.co/mradermacher/Llama3.1-8B-Cobalt-GGUF
+  description: |
+    Cobalt is a math-instruct model built on Llama 3.1 8b.
+    High quality math instruct performance within the Llama 3 Instruct chat format
+    Finetuned on synthetic math-instruct data generated with Llama 3.1 405b. Find the current version of the dataset here!
+    Version
+    This is the 2024-08-16 release of Cobalt for Llama 3.1 8b.
+    Help us and recommend Cobalt to your friends! We're excited for more Cobalt releases in the future.
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Cobalt.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Cobalt.Q4_K_M.gguf
+      sha256: 44340f1ebbc3bf4e4e23d04ac3580c26fdc0b5717f23b45ce30743aa1eeed7ed
+      uri: huggingface://mradermacher/Llama3.1-8B-Cobalt-GGUF/Llama3.1-8B-Cobalt.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-arliai-rpmax-v1.3"
+  urls:
+    - https://huggingface.co/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.3
+    - https://huggingface.co/bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+    Many RPMax users mentioned that these models does not feel like any other RP models, having a different writing style and generally doesn't feel in-bred.
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+      sha256: 66fcbbe96950cc3424cba866f929180d83f1bffdb0d4eedfa9b1f55cf0ea5c26
+      uri: huggingface://bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.3-GGUF/Llama-3.1-8B-ArliAI-RPMax-v1.3-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-8b-slush-i1"
+  icon: https://huggingface.co/crestf411/L3.1-8B-Slush/resolve/main/slush.jpg?
+  urls:
+    - https://huggingface.co/crestf411/L3.1-8B-Slush
+    - https://huggingface.co/mradermacher/L3.1-8B-Slush-i1-GGUF
+  description: |
+    Slush is a two-stage model trained with high LoRA dropout, where stage 1 is a pretraining continuation on the base model, aimed at boosting the model's creativity and writing capabilities. This is then merged into the instruction tune model, and stage 2 is a fine tuning step on top of this to further enhance its roleplaying capabilities and/or to repair any damage caused in the stage 1 merge.
+    This is an initial experiment done on the at-this-point-infamous Llama 3.1 8B model, in an attempt to retain its smartness while addressing its abysmal lack of imagination/creativity. As always, feedback is welcome, and begone if you demand perfection.
+    The second stage, like the Sunfall series, follows the Silly Tavern preset, so ymmv in particular if you use some other tool and/or preset.
+  overrides:
+    parameters:
+      model: L3.1-8B-Slush.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-8B-Slush.i1-Q4_K_M.gguf
+      sha256: 98c53cd1ec0e2b00400c5968cd076a589d0c889bca13ec52abfe4456cfa039be
+      uri: huggingface://mradermacher/L3.1-8B-Slush-i1-GGUF/L3.1-8B-Slush.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/C-ndfxAGdf21DjchZcf2p.png
+  name: "l3.1-ms-astoria-70b-v2"
+  urls:
+    - https://huggingface.co/Steelskull/L3.1-MS-Astoria-70b-v2
+    - https://huggingface.co/bartowski/L3.1-MS-Astoria-70b-v2-GGUF
+  description: |
+    This model is a remake of the original astoria with modern models and context sizes its goal is to merge the robust storytelling of mutiple models while attempting to maintain intelligence.
+
+    Use Llama 3 Format or meth format (llama 3 refuses to work with stepped thinking but meth works)
+      - model: migtissera/Tess-3-Llama-3.1-70B
+      - model: NeverSleep/Lumimaid-v0.2-70B
+      - model: Sao10K/L3.1-70B-Euryale-v2.2
+      - model: ArliAI/Llama-3.1-70B-ArliAI-RPMax-v1.2
+      - model: nbeerbower/Llama3.1-Gutenberg-Doppel-70B
+  overrides:
+    parameters:
+      model: L3.1-MS-Astoria-70b-v2-Q4_K_M.gguf
+  files:
+    - filename: L3.1-MS-Astoria-70b-v2-Q4_K_M.gguf
+      sha256: c02658ead1ecdc25c7218b8d9d11786f19c16d64f0d453082998e313edb0d4a6
+      uri: huggingface://bartowski/L3.1-MS-Astoria-70b-v2-GGUF/L3.1-MS-Astoria-70b-v2-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "magnum-v2-4b-i1"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/9JwXZze4tHRGpc_RzE2AU.png
+  urls:
+    - https://huggingface.co/anthracite-org/magnum-v2-4b
+    - https://huggingface.co/mradermacher/magnum-v2-4b-i1-GGUF
+  description: |
+    This is the eighth in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of IntervitensInc/Llama-3.1-Minitron-4B-Width-Base-chatml.
+  overrides:
+    parameters:
+      model: magnum-v2-4b.i1-Q4_K_M.gguf
+  files:
+    - filename: magnum-v2-4b.i1-Q4_K_M.gguf
+      sha256: 692618059fee8870759d67d275ebc59bc0474b18ae3571b3ebdec8f9da786a64
+      uri: huggingface://mradermacher/magnum-v2-4b-i1-GGUF/magnum-v2-4b.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-nemotron-sunfall-v0.7.0-i1"
+  urls:
+    - https://huggingface.co/crestf411/L3.1-nemotron-sunfall-v0.7.0
+    - https://huggingface.co/mradermacher/L3.1-nemotron-sunfall-v0.7.0-i1-GGUF
+  description: |
+    Significant revamping of the dataset metadata generation process, resulting in higher quality dataset overall. The "Diamond Law" experiment has been removed as it didn't seem to affect the model output enough to warrant set up complexity.
+    Recommended starting point:
+        Temperature: 1
+        MinP: 0.05~0.1
+        DRY: 0.8 1.75 2 0
+    At early context, I recommend keeping XTC disabled. Once you hit higher context sizes (10k+), enabling XTC at 0.1 / 0.5 seems to significantly improve the output, but YMMV. If the output drones on and is uninspiring, XTC can be extremely effective.
+    General heuristic:
+        Lots of slop? Temperature is too low. Raise it, or enable XTC. For early context, temp bump is probably preferred.
+        Is the model making mistakes about subtle or obvious details in the scene? Temperature is too high, OR XTC is enabled and/or XTC settings are too high. Lower temp and/or disable XTC.
+  overrides:
+    parameters:
+      model: L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+      sha256: f9aa88f3b220e35662a2d62d1f615a3b425e348a8f9e2939f05bf57385119f76
+      uri: huggingface://mradermacher/L3.1-nemotron-sunfall-v0.7.0-i1-GGUF/L3.1-nemotron-sunfall-v0.7.0.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-mesh"
+  urls:
+    - https://huggingface.co/Zhengyi/LLaMA-Mesh
+    - https://huggingface.co/bartowski/LLaMA-Mesh-GGUF
+  description: |
+    LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models
+    Pre-trained model weights of LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models. This work explores expanding the capabilities of large language models (LLMs) pretrained on text to generate 3D meshes within a unified model
+  overrides:
+    parameters:
+      model: LLaMA-Mesh-Q4_K_M.gguf
+  files:
+    - filename: LLaMA-Mesh-Q4_K_M.gguf
+      sha256: 150ac70c92bb7351468768bcc84bd3018f44b624f709821fee8e5e816e4868e7
+      uri: huggingface://bartowski/LLaMA-Mesh-GGUF/LLaMA-Mesh-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-instruct-ortho-v3"
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/llama-3.1-8b-instruct-ortho-v3
+    - https://huggingface.co/mradermacher/llama-3.1-8b-instruct-ortho-v3-GGUF
+  description: |
+    A few different attempts at orthogonalization/abliteration of llama-3.1-8b-instruct using variations of the method from "Mechanistically Eliciting Latent Behaviors in Language Models".
+    Each of these use different vectors and have some variations in where the new refusal boundaries lie. None of them seem totally jailbroken.
+  overrides:
+    parameters:
+      model: llama-3.1-8b-instruct-ortho-v3.Q4_K_M.gguf
+  files:
+    - filename: llama-3.1-8b-instruct-ortho-v3.Q4_K_M.gguf
+      sha256: 8d1dd638ed80019f5cd61240d1f06fd1333413f61427bef4d288c5b8cd9d8cea
+      uri: huggingface://mradermacher/llama-3.1-8b-instruct-ortho-v3-GGUF/llama-3.1-8b-instruct-ortho-v3.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-tulu-3-8b-dpo"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu3/Tulu3-logo.png
+  urls:
+    - https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B-DPO
+    - https://huggingface.co/mradermacher/Llama-3.1-Tulu-3-8B-DPO-GGUF
+  description: |
+    Tülu3 is a leading instruction following model family, offering fully open-source data, code, and recipes designed to serve as a comprehensive guide for modern post-training techniques. Tülu3 is designed for state-of-the-art performance on a diversity of tasks in addition to chat, such as MATH, GSM8K, and IFEval.
+  overrides:
+    parameters:
+      model: Llama-3.1-Tulu-3-8B-DPO.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Tulu-3-8B-DPO.Q4_K_M.gguf
+      sha256: 8991bef1775edc5190047ef268d60876c2df3a80cf6da5f1bd1e82d09dd0ab2b
+      uri: huggingface://mradermacher/Llama-3.1-Tulu-3-8B-DPO-GGUF/Llama-3.1-Tulu-3-8B-DPO.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-aspire-heart-matrix-8b"
+  urls:
+    - https://huggingface.co/ZeroXClem/L3-Aspire-Heart-Matrix-8B
+    - https://huggingface.co/mradermacher/L3.1-Aspire-Heart-Matrix-8B-GGUF
+  description: |
+    ZeroXClem/L3-Aspire-Heart-Matrix-8B is an experimental language model crafted by merging three high-quality 8B parameter models using the Model Stock Merge method. This synthesis leverages the unique strengths of Aspire, Heart Stolen, and CursedMatrix, creating a highly versatile and robust language model for a wide array of tasks.
+  overrides:
+    parameters:
+      model: L3.1-Aspire-Heart-Matrix-8B.Q4_K_M.gguf
+  files:
+    - filename: L3.1-Aspire-Heart-Matrix-8B.Q4_K_M.gguf
+      sha256: 4d90abaae59f39e8f04548151265dce3b9c913303e6755860f5d28dd5cfc2d86
+      uri: huggingface://mradermacher/L3.1-Aspire-Heart-Matrix-8B-GGUF/L3.1-Aspire-Heart-Matrix-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "dark-chivalry_v1.0-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/66c1cc08453a7ef6c5fe657a/A9vNZXVnD3xFiZ7cMLOKy.png
+  urls:
+    - https://huggingface.co/Triangle104/Dark-Chivalry_V1.0
+    - https://huggingface.co/mradermacher/Dark-Chivalry_V1.0-i1-GGUF
+  description: |
+    The dark side of chivalry...
+    This model was merged using the TIES merge method using ValiantLabs/Llama3.1-8B-ShiningValiant2 as a base.
+  overrides:
+    parameters:
+      model: Dark-Chivalry_V1.0.i1-Q4_K_M.gguf
+  files:
+    - filename: Dark-Chivalry_V1.0.i1-Q4_K_M.gguf
+      sha256: 6659fad2ea7e40b862a02d683a4bcb9044704fc7f6d3f50cd54c9069860171cd
+      uri: huggingface://mradermacher/Dark-Chivalry_V1.0-i1-GGUF/Dark-Chivalry_V1.0.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "tulu-3.1-8b-supernova-i1"
+  urls:
+    - https://huggingface.co/bunnycore/Tulu-3.1-8B-SuperNova
+    - https://huggingface.co/mradermacher/Tulu-3.1-8B-SuperNova-i1-GGUF
+  description: |
+    The following models were included in the merge:
+        meditsolutions/Llama-3.1-MedIT-SUN-8B
+        allenai/Llama-3.1-Tulu-3-8B
+        arcee-ai/Llama-3.1-SuperNova-Lite
+  overrides:
+    parameters:
+      model: Tulu-3.1-8B-SuperNova.i1-Q4_K_M.gguf
+  files:
+    - filename: Tulu-3.1-8B-SuperNova.i1-Q4_K_M.gguf
+      sha256: c6cc2e1a4c3d2338973ca0050af1cf4462b3f62838f62b4c8a204f2a74eeb01f
+      uri: huggingface://mradermacher/Tulu-3.1-8B-SuperNova-i1-GGUF/Tulu-3.1-8B-SuperNova.i1-Q4_K_M.gguf
 - &deepseek
  ## Deepseek
  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -2661,6 +3503,21 @@
    - filename: EdgeRunner-Tactical-7B.Q4_K_M.gguf
      sha256: 90ca9c3ab19e5d1de4499e3f988cc0ba3d205e50285d7c89de6f0a4c525bf204
      uri: huggingface://RichardErkhov/edgerunner-ai_-_EdgeRunner-Tactical-7B-gguf/EdgeRunner-Tactical-7B.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "marco-o1"
+  icon: https://huggingface.co/AIDC-AI/Marco-o1/resolve/main/assets/logo.png
+  urls:
+    - https://huggingface.co/AIDC-AI/Marco-o1
+    - https://huggingface.co/QuantFactory/Marco-o1-GGUF
+  description: |
+    Marco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: "Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?"
+  overrides:
+    parameters:
+      model: Marco-o1.Q4_K_M.gguf
+  files:
+    - filename: Marco-o1.Q4_K_M.gguf
+      sha256: 54dd9554cb54609bf0bf4b367dfba192fc982a2fc6b87a0f56fba5ea82762d0d
+      uri: huggingface://QuantFactory/Marco-o1-GGUF/Marco-o1.Q4_K_M.gguf
 - &mistral03
  ## START Mistral
  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -3021,6 +3878,54 @@
    - filename: Valor-7B-v0.1.Q4_K_M.gguf
      sha256: 2b695fe53d64b36c3eea68f1fa0809f30560aa97ce8b71c16f371c2dc262d9b8
      uri: huggingface://mradermacher/Valor-7B-v0.1-GGUF/Valor-7B-v0.1.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mn-tiramisu-12b"
+  icon: https://huggingface.co/matchaaaaa/MN-Tiramisu-12B/resolve/main/tiramisu-cute.png
+  urls:
+    - https://huggingface.co/matchaaaaa/MN-Tiramisu-12B
+    - https://huggingface.co/MaziyarPanahi/MN-Tiramisu-12B-GGUF
+  description: |
+    This is a really yappity-yappy yapping model that's good for long-form RP. Tried to rein it in with Mahou and give it some more character understanding with Pantheon. Feedback is always welcome.
+  overrides:
+    parameters:
+      model: MN-Tiramisu-12B.Q5_K_M.gguf
+  files:
+    - filename: MN-Tiramisu-12B.Q5_K_M.gguf
+      sha256: 100c78b08a0f4fc5a5a65797e1498ff5fd6fc9daf96b0898d2de731c35fa4e3e
+      uri: huggingface://MaziyarPanahi/MN-Tiramisu-12B-GGUF/MN-Tiramisu-12B.Q5_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mistral-nemo-prism-12b"
+  icon: https://huggingface.co/nbeerbower/Mistral-Nemo-Prism-12B/resolve/main/prism-cover.png
+  urls:
+    - https://huggingface.co/nbeerbower/Mistral-Nemo-Prism-12B
+    - https://huggingface.co/bartowski/Mistral-Nemo-Prism-12B-GGUF
+  description: |
+    Mahou-1.5-mistral-nemo-12B-lorablated finetuned on Arkhaios-DPO and Purpura-DPO.
+    The goal was to reduce archaic language and purple prose in a completely uncensored model.
+  overrides:
+    parameters:
+      model: Mistral-Nemo-Prism-12B-Q4_K_M.gguf
+  files:
+    - filename: Mistral-Nemo-Prism-12B-Q4_K_M.gguf
+      sha256: 96b922c6d55d94ffb91e869b8cccaf2b6dc449d75b1456f4d4578c92c8184c25
+      uri: huggingface://bartowski/Mistral-Nemo-Prism-12B-GGUF/Mistral-Nemo-Prism-12B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "magnum-12b-v2.5-kto-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/sWYs3iHkn36lw6FT_Y7nn.png
+  urls:
+    - https://huggingface.co/mradermacher/magnum-12b-v2.5-kto-i1-GGUF
+  description: |
+    v2.5 KTO is an experimental release; we are testing a hybrid reinforcement learning strategy of KTO + DPOP, using rejected data sampled from the original model as "rejected". For "chosen", we use data from the original finetuning dataset as "chosen". This was done on a limited portion of of primarily instruction following data; we plan to scale up a larger KTO dataset in the future for better generalization. This is the 5th in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of anthracite-org/magnum-12b-v2.
+  overrides:
+    parameters:
+      model: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
+  files:
+    - filename: magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
+      sha256: 07e91d2c6d4e42312e65a69c54f16be467575f7a596fe052993b388e38b90d76
+      uri: huggingface://mradermacher/magnum-12b-v2.5-kto-i1-GGUF/magnum-12b-v2.5-kto.i1-Q4_K_M.gguf
 - &mudler
  ### START mudler's LocalAI specific-models
  url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -5260,6 +6165,21 @@
    - filename: Llama-3-SEC-Chat-Q4_K_M.gguf
      uri: huggingface://arcee-ai/Llama-3-SEC-Chat-GGUF/Llama-3-SEC-Chat-Q4_K_M.gguf
      sha256: 0d837400af161ba4136233db191330f2d77e297e079f0b6249e877c375cb56f3
+- !!merge <<: *llama3
+  name: "copus-2x8b-i1"
+  icon: https://huggingface.co/lodrick-the-lafted/Copus-2x8B/resolve/main/copus.png
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/Copus-2x8B
+    - https://huggingface.co/mradermacher/Copus-2x8B-i1-GGUF
+  description: |
+    Which were the two most interesting llama3 finetunes as of yet. Resulting model seems OK. It's not on Miqu's level, anyway.
+  overrides:
+    parameters:
+      model: Copus-2x8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Copus-2x8B.i1-Q4_K_M.gguf
+      sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
+      uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
 - &yi-chat
  ### Start Yi
  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -8397,3 +9317,22 @@
    - filename: voice-zh_CN-huayan-medium.tar.gz
      uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
      sha256: 0299a5e7f481ba853404e9f0e1515a94d5409585d76963fa4d30c64bd630aa99
+- name: "silero-vad"
+  url: github:mudler/LocalAI/gallery/virtual.yaml@master
+  urls:
+    - https://github.com/snakers4/silero-vad
+    - https://huggingface.co/onnx-community/silero-vad
+  description: |
+    Silero VAD - pre-trained enterprise-grade Voice Activity Detector.
+  tags:
+    - vad
+    - voice-activity-detection
+    - cpu
+  overrides:
+    backend: silero-vad
+    parameters:
+      model: silero-vad.onnx
+  files:
+    - filename: silero-vad.onnx
+      uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
+      sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/gallery/silero-vad.yaml
+++ b/gallery/silero-vad.yaml
@@ -0,0 +1,8 @@
+---
+name: "silero-vad"
+
+config_file: |
+  name: vad
+  backend: silero-vad
+  parameters:
+    model: silero_vad.onnx
--- a/go.mod
+++ b/go.mod
@@ -5,16 +5,16 @@ go 1.23
 toolchain go1.23.1

 require (
-	dario.cat/mergo v1.0.0
+	dario.cat/mergo v1.0.1
 	github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9
-	github.com/Masterminds/sprig/v3 v3.2.3
+	github.com/Masterminds/sprig/v3 v3.3.0
 	github.com/alecthomas/kong v0.9.0
 	github.com/census-instrumentation/opencensus-proto v0.4.1
 	github.com/charmbracelet/glamour v0.7.0
 	github.com/chasefleming/elem-go v0.26.0
 	github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b
 	github.com/containerd/containerd v1.7.19
-	github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44
+	github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
 	github.com/elliotchance/orderedmap/v2 v2.2.0
 	github.com/fsnotify/fsnotify v1.7.0
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad
@@ -24,6 +24,7 @@ require (
 	github.com/gofiber/fiber/v2 v2.52.5
 	github.com/gofiber/swagger v1.0.0
 	github.com/gofiber/template/html/v2 v2.1.2
+	github.com/gofiber/websocket/v2 v2.2.1
 	github.com/gofrs/flock v0.12.1
 	github.com/golang/protobuf v1.5.4
 	github.com/google/go-containerregistry v0.19.2
@@ -37,11 +38,12 @@ require (
 	github.com/libp2p/go-libp2p v0.36.2
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.26
-	github.com/mudler/edgevpn v0.28.3
+	github.com/mudler/edgevpn v0.28.4
 	github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82
 	github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
-	github.com/onsi/ginkgo/v2 v2.20.1
-	github.com/onsi/gomega v1.34.1
+	github.com/onsi/ginkgo/v2 v2.21.0
+	github.com/onsi/gomega v1.35.1
+	github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e
 	github.com/ory/dockertest/v3 v3.10.0
 	github.com/otiai10/openaigo v1.7.0
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
@@ -51,6 +53,7 @@ require (
 	github.com/sashabaranov/go-openai v1.26.2
 	github.com/schollz/progressbar/v3 v3.14.4
 	github.com/shirou/gopsutil/v3 v3.24.5
+	github.com/streamer45/silero-vad-go v0.2.1
 	github.com/stretchr/testify v1.9.0
 	github.com/swaggo/swag v1.16.3
 	github.com/thxcode/gguf-parser-go v0.1.0
@@ -62,7 +65,7 @@ require (
 	go.opentelemetry.io/otel/sdk/metric v1.28.0
 	google.golang.org/api v0.180.0
 	google.golang.org/grpc v1.65.0
-	google.golang.org/protobuf v1.34.2
+	google.golang.org/protobuf v1.35.1
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 	oras.land/oras-go/v2 v2.5.0
@@ -73,17 +76,14 @@ require (
 	cloud.google.com/go/auth v0.4.1 // indirect
 	cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
 	cloud.google.com/go/compute/metadata v0.3.0 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
-	github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 // indirect
 	github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
+	github.com/fasthttp/websocket v1.5.3 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
 	github.com/google/s2a-go v0.1.7 // indirect
 	github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
 	github.com/googleapis/gax-go/v2 v2.12.4 // indirect
-	github.com/labstack/echo/v4 v4.12.0 // indirect
-	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/pion/datachannel v1.5.8 // indirect
@@ -102,12 +102,9 @@ require (
 	github.com/pion/transport/v2 v2.2.10 // indirect
 	github.com/pion/turn/v2 v2.1.6 // indirect
 	github.com/pion/webrtc/v3 v3.3.0 // indirect
-	github.com/russross/blackfriday/v2 v2.1.0 // indirect
+	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
 	github.com/shirou/gopsutil/v4 v4.24.7 // indirect
-	github.com/urfave/cli/v2 v2.27.4 // indirect
-	github.com/valyala/fasttemplate v1.2.2 // indirect
 	github.com/wlynxg/anet v0.0.4 // indirect
-	github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0 // indirect
 	go.uber.org/mock v0.4.0 // indirect
 	golang.org/x/oauth2 v0.21.0 // indirect
@@ -118,7 +115,7 @@ require (
 	github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
 	github.com/KyleBanks/depth v1.2.1 // indirect
 	github.com/Masterminds/goutils v1.1.1 // indirect
-	github.com/Masterminds/semver/v3 v3.2.0 // indirect
+	github.com/Masterminds/semver/v3 v3.3.0 // indirect
 	github.com/Microsoft/go-winio v0.6.2 // indirect
 	github.com/Microsoft/hcsshim v0.11.7 // indirect
 	github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
@@ -138,7 +135,7 @@ require (
 	github.com/containerd/log v0.1.0 // indirect
 	github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect
 	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
-	github.com/creachadair/otp v0.4.2 // indirect
+	github.com/creachadair/otp v0.5.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect
 	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect
@@ -154,7 +151,7 @@ require (
 	github.com/flynn/noise v1.1.0 // indirect
 	github.com/francoispqt/gojay v1.2.13 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
-	github.com/go-audio/audio v1.0.0 // indirect
+	github.com/go-audio/audio v1.0.0
 	github.com/go-audio/riff v1.0.0 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -173,7 +170,7 @@ require (
 	github.com/google/btree v1.1.2 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gopacket v1.1.19 // indirect
-	github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 // indirect
+	github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
 	github.com/gorilla/websocket v1.5.3 // indirect
@@ -182,9 +179,8 @@ require (
 	github.com/hashicorp/golang-lru v1.0.2 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/henvic/httpretty v0.1.3 // indirect
-	github.com/huandu/xstrings v1.3.3 // indirect
+	github.com/huandu/xstrings v1.5.0 // indirect
 	github.com/huin/goupnp v1.3.0 // indirect
-	github.com/imdario/mergo v0.3.16 // indirect
 	github.com/ipfs/boxo v0.21.0 // indirect
 	github.com/ipfs/go-cid v0.4.1 // indirect
 	github.com/ipfs/go-datastore v0.6.0 // indirect
@@ -237,8 +233,8 @@ require (
 	github.com/muesli/termenv v0.15.2 // indirect
 	github.com/multiformats/go-base32 v0.1.0 // indirect
 	github.com/multiformats/go-base36 v0.2.0 // indirect
-	github.com/multiformats/go-multiaddr v0.13.0 // indirect
-	github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect
+	github.com/multiformats/go-multiaddr v0.14.0
+	github.com/multiformats/go-multiaddr-dns v0.4.0 // indirect
 	github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect
 	github.com/multiformats/go-multibase v0.2.0 // indirect
 	github.com/multiformats/go-multicodec v0.9.0 // indirect
@@ -270,12 +266,12 @@ require (
 	github.com/raulk/go-watchdog v1.3.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/shoenig/go-m1cpu v0.1.6 // indirect
-	github.com/shopspring/decimal v1.3.1 // indirect
+	github.com/shopspring/decimal v1.4.0 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
 	github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
-	github.com/spf13/cast v1.5.0 // indirect
+	github.com/spf13/cast v1.7.0 // indirect
 	github.com/swaggo/files/v2 v2.0.0 // indirect
 	github.com/tinylib/msgp v1.1.8 // indirect
 	github.com/tklauser/go-sysconf v0.3.14 // indirect
@@ -301,15 +297,15 @@ require (
 	go.uber.org/fx v1.22.2 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/crypto v0.26.0 // indirect
+	golang.org/x/crypto v0.28.0 // indirect
 	golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
-	golang.org/x/mod v0.20.0 // indirect
-	golang.org/x/net v0.28.0 // indirect
+	golang.org/x/mod v0.21.0 // indirect
+	golang.org/x/net v0.30.0 // indirect
 	golang.org/x/sync v0.8.0 // indirect
-	golang.org/x/sys v0.24.0 // indirect
-	golang.org/x/term v0.23.0 // indirect
-	golang.org/x/text v0.17.0 // indirect
-	golang.org/x/tools v0.24.0 // indirect
+	golang.org/x/sys v0.27.0 // indirect
+	golang.org/x/term v0.25.0 // indirect
+	golang.org/x/text v0.19.0 // indirect
+	golang.org/x/tools v0.26.0 // indirect
 	golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
--- a/go.sum
+++ b/go.sum
@@ -10,8 +10,8 @@ cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKF
 cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q=
 cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc=
 cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k=
-dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk=
-dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
+dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
+dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
 dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
 dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBrvjyP0v+ecvNYvCpyZgu5/xkfAUhi6wJj28eUfSU=
 dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4=
@@ -27,10 +27,10 @@ github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9 h1:ASsbvw7wQP
 github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
 github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
 github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
-github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
-github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
-github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
-github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
+github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
+github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
+github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
+github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
 github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
 github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
 github.com/Microsoft/hcsshim v0.11.7 h1:vl/nj3Bar/CvJSYo7gIQPyRWc9f3c6IeSNavBTSZNZQ=
@@ -102,12 +102,10 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
-github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/creachadair/mds v0.7.0 h1:7QoYqiPl18C0h7CLq9z9/qUH5Vr62V9677yJZHGLoQM=
-github.com/creachadair/mds v0.7.0/go.mod h1:4vrFYUzTXMJpMBU+OA292I6IUxKWCCfZkgXg+/kBZMo=
-github.com/creachadair/otp v0.4.2 h1:ngNMaD6Tzd7UUNRFyed7ykZFn/Wr5sSs5ffqZWm9pu8=
-github.com/creachadair/otp v0.4.2/go.mod h1:DqV9hJyUbcUme0pooYfiFvvMe72Aua5sfhNzwfZvk40=
+github.com/creachadair/mds v0.21.3 h1:RRgEAPIb52cU0q7UxGyN+13QlCVTZIL4slRr0cYYQfA=
+github.com/creachadair/mds v0.21.3/go.mod h1:1ltMWZd9yXhaHEoZwBialMaviWVUpRPvMwVP7saFAzM=
+github.com/creachadair/otp v0.5.0 h1:q3Th7CXm2zlmCdBjw5tEPFOj4oWJMnVL5HXlq0sNKS0=
+github.com/creachadair/otp v0.5.0/go.mod h1:0kceI87EnYFNYSTL121goJVAnk3eJhaed9H0nMuJUkA=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
 github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
@@ -153,6 +151,8 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A=
 github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew=
+github.com/fasthttp/websocket v1.5.3 h1:TPpQuLwJYfd4LJPXvHDYPMFWbLjsT91n3GpWtCQtdek=
+github.com/fasthttp/websocket v1.5.3/go.mod h1:46gg/UBmTU1kUaTcwQXpUxtRwG2PvIZYeA8oL6vF3Fs=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
@@ -221,6 +221,8 @@ github.com/gofiber/template/html/v2 v2.1.2 h1:wkK/mYJ3nIhongTkG3t0QgV4ADdgOYJYVS
 github.com/gofiber/template/html/v2 v2.1.2/go.mod h1:E98Z/FzvpaSib06aWEgYk6GXNf3ctoyaJH8yW5ay5ak=
 github.com/gofiber/utils v1.1.0 h1:vdEBpn7AzIUJRhe+CiTOJdUcTg4Q9RK+pEa0KPbLdrM=
 github.com/gofiber/utils v1.1.0/go.mod h1:poZpsnhBykfnY1Mc0KeEa6mSHrS3dV0+oBWyeQmb2e0=
+github.com/gofiber/websocket/v2 v2.2.1 h1:C9cjxvloojayOp9AovmpQrk8VqvVnT8Oao3+IUygH7w=
+github.com/gofiber/websocket/v2 v2.2.1/go.mod h1:Ao/+nyNnX5u/hIFPuHl28a+NIkrqK7PRimyKaj4JxVU=
 github.com/gofrs/flock v0.12.1 h1:MTLVXXHf8ekldpJk3AKicLij9MdwOWkZ+a/jHHZby9E=
 github.com/gofrs/flock v0.12.1/go.mod h1:9zxTsyu5xtJ9DK+1tFZyibEV7y3uwDxPPfbxeeHCoD0=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@@ -270,14 +272,13 @@ github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF
 github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
-github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
 github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
-github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
@@ -313,13 +314,10 @@ github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUq
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
-github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
-github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
+github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
+github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
 github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc=
 github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8=
-github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
-github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
-github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/ipfs/boxo v0.21.0 h1:XpGXb+TQQ0IUdYaeAxGzWjSs6ow/Lce148A/2IbRDVE=
 github.com/ipfs/boxo v0.21.0/go.mod h1:NmweAYeY1USOaJJxouy7DLr/Y5M8UBSsCI2KRivO+TY=
 github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s=
@@ -384,10 +382,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/labstack/echo/v4 v4.12.0 h1:IKpw49IMryVB2p1a4dzwlhP1O2Tf2E0Ir/450lH+kI0=
-github.com/labstack/echo/v4 v4.12.0/go.mod h1:UP9Cr2DJXbOK3Kr9ONYzNowSh7HP0aG0ShAyycHSJvM=
-github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
-github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
 github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
 github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8=
@@ -451,7 +445,6 @@ github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssn
 github.com/microcosm-cc/bluemonday v1.0.1/go.mod h1:hsXNsILzKxV+sX77C5b8FSuKF00vh2OMYv+xgHpAMF4=
 github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3rQ0k/Khz58=
 github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
-github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
 github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ=
 github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ=
@@ -467,12 +460,10 @@ github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dz
 github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
 github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
-github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
 github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
 github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
-github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
 github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
 github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
 github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
@@ -488,16 +479,8 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
 github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
 github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
 github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
-github.com/mudler/edgevpn v0.27.4 h1:T/irkShcnU6h9OZqtvWXpNo+3gQVPUMBEoUutxJ3YUg=
-github.com/mudler/edgevpn v0.27.4/go.mod h1:NFs/RpDHCaltPFnZmOLCiUmVpTQloER1LbAtptOzqrw=
-github.com/mudler/edgevpn v0.28.0 h1:oF/Msx3zPNajy3uYLPRT5M7H3Z+sCMU0wAA8TkB11PI=
-github.com/mudler/edgevpn v0.28.0/go.mod h1:/xk8vnXUcGajPPMW5rZhPt1aD0b95LeOj2xGbRbDS8A=
-github.com/mudler/edgevpn v0.28.2 h1:wxLrH9b3NNQDgMb0Uy4gmqbGh6Ad5jdbf21GrU32xVU=
-github.com/mudler/edgevpn v0.28.2/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
-github.com/mudler/edgevpn v0.28.3 h1:yIuoMExwKHy/mNMBXIsm6FUFbnB9ELIxw9KXrK9KHDk=
-github.com/mudler/edgevpn v0.28.3/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
-github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
-github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
+github.com/mudler/edgevpn v0.28.4 h1:9shCLB3TRyCoZtWc1NUXhfhd/R9bURkbNuxi5tJJMvo=
+github.com/mudler/edgevpn v0.28.4/go.mod h1:KJMuWVXboAg7gdOGk7tmiwM1trBpmlGidH/ODQqBvjg=
 github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb h1:5qcuxQEpAqeV4ftV5nUt3/hB/RoTXq3MaaauOAedyXo=
 github.com/mudler/go-piper v0.0.0-20241022074816-3854e0221ffb/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
@@ -515,11 +498,10 @@ github.com/multiformats/go-base32 v0.1.0/go.mod h1:Kj3tFY6zNr+ABYMqeUNeGvkIC/UYg
 github.com/multiformats/go-base36 v0.2.0 h1:lFsAbNOGeKtuKozrtBsAkSVhv1p9D0/qedU9rQyccr0=
 github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a1UV0xHgWc0hkp4=
 github.com/multiformats/go-multiaddr v0.1.1/go.mod h1:aMKBKNEYmzmDmxfX88/vz+J5IU55txyt0p4aiWVohjo=
-github.com/multiformats/go-multiaddr v0.2.0/go.mod h1:0nO36NvPpyV4QzvTLi/lafl2y95ncPj0vFwVF6k6wJ4=
-github.com/multiformats/go-multiaddr v0.13.0 h1:BCBzs61E3AGHcYYTv8dqRH43ZfyrqM8RXVPT8t13tLQ=
-github.com/multiformats/go-multiaddr v0.13.0/go.mod h1:sBXrNzucqkFJhvKOiwwLyqamGa/P5EIXNPLovyhQCII=
-github.com/multiformats/go-multiaddr-dns v0.3.1 h1:QgQgR+LQVt3NPTjbrLLpsaT2ufAA2y0Mkk+QRVJbW3A=
-github.com/multiformats/go-multiaddr-dns v0.3.1/go.mod h1:G/245BRQ6FJGmryJCrOuTdB37AMA5AMOVuO6NY3JwTk=
+github.com/multiformats/go-multiaddr v0.14.0 h1:bfrHrJhrRuh/NXH5mCnemjpbGjzRw/b+tJFOD41g2tU=
+github.com/multiformats/go-multiaddr v0.14.0/go.mod h1:6EkVAxtznq2yC3QT5CM1UTAwG0GTP3EWAIcjHuzQ+r4=
+github.com/multiformats/go-multiaddr-dns v0.4.0 h1:P76EJ3qzBXpUXZ3twdCDx/kvagMsNo0LMFXpyms/zgU=
+github.com/multiformats/go-multiaddr-dns v0.4.0/go.mod h1:7hfthtB4E4pQwirrz+J0CcDUfbWzTqEzVyYKKIKpgkc=
 github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/eQsuaL3/CWe167E=
 github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo=
 github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g=
@@ -531,7 +513,6 @@ github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7B
 github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM=
 github.com/multiformats/go-multistream v0.5.0 h1:5htLSLl7lvJk3xx3qT/8Zm9J4K8vEOf/QGkvOGQAyiE=
 github.com/multiformats/go-multistream v0.5.0/go.mod h1:n6tMZiwiP2wUsR8DgfDWw1dydlEqV3l6N3/GBsX6ILA=
-github.com/multiformats/go-varint v0.0.1/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
 github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/nEGOHFS8=
 github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU=
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
@@ -546,12 +527,10 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
-github.com/onsi/ginkgo/v2 v2.20.0 h1:PE84V2mHqoT1sglvHc8ZdQtPcwmvvt29WLEEO3xmdZw=
-github.com/onsi/ginkgo/v2 v2.20.0/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
-github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo=
-github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
-github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
-github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
+github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM=
+github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
+github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
+github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
@@ -564,6 +543,8 @@ github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
 github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
 github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
 github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
+github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e h1:s2RNOM/IGdY0Y6qfTeUKhDawdHDpK9RGBdx80qN4Ttw=
+github.com/orcaman/writerseeker v0.0.0-20200621085525-1d3f536ff85e/go.mod h1:nBdnFKj15wFbf94Rwfq4m30eAcyY9V/IyKAGQFtqkW0=
 github.com/ory/dockertest/v3 v3.10.0 h1:4K3z2VMe8Woe++invjaTB7VRyQXQy5UY+loujO4aNE4=
 github.com/ory/dockertest/v3 v3.10.0/go.mod h1:nr57ZbRWMqfsdGdFNLHz5jjNdDb7VVFnzAeW1n5N1Lg=
 github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
@@ -668,10 +649,11 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR
 github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
 github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.26.2 h1:cVlQa3gn3eYqNXRW03pPlpy6zLG52EU4g0FrWXc0EFI=
 github.com/sashabaranov/go-openai v1.26.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
 github.com/schollz/progressbar/v3 v3.14.4 h1:W9ZrDSJk7eqmQhd3uxFNNcTr0QL+xuGNI9dEMrw0r74=
 github.com/schollz/progressbar/v3 v3.14.4/go.mod h1:aT3UQ7yGm+2ZjeXPqsjTenwL3ddUiuZ0kfQ/2tHlyNI=
 github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
@@ -683,9 +665,8 @@ github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFt
 github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
 github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
 github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
-github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
-github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
-github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
+github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
 github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY=
 github.com/shurcooL/events v0.0.0-20181021180414-410e4ca65f48/go.mod h1:5u70Mqkb5O5cxEA8nxTsgrgLehJeAw6Oc4Ab1c/P1HM=
 github.com/shurcooL/github_flavored_markdown v0.0.0-20181002035957-2122de532470/go.mod h1:2dOwnU2uBioM+SGy2aZoq1f/Sd1l9OkAeAUvjSyvgU0=
@@ -726,9 +707,10 @@ github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:Udh
 github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA=
 github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
 github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
-github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w=
-github.com/spf13/cast v1.5.0/go.mod h1:SpXXQ5YoyJw6s3/6cMTQuxvgRl3PCJiyaX9p6b155UU=
+github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
+github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/streamer45/silero-vad-go v0.2.1 h1:Li1/tTC4H/3cyw6q4weX+U8GWwEL3lTekK/nYa1Cvuk=
+github.com/streamer45/silero-vad-go v0.2.1/go.mod h1:B+2FXs/5fZ6pzl6unUZYhZqkYdOB+3saBVzjOzdZnUs=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
@@ -736,7 +718,6 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
-github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
@@ -765,29 +746,19 @@ github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
 github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
-github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
-github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
-github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
 github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
-github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
-github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
 github.com/vbatts/tar-split v0.11.3/go.mod h1:9QlHN18E+fEH7RdG+QAJJcuya3rqT7eXSTY7wGrAokY=
 github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU=
 github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM=
-github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs=
-github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
 github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
 github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
-github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
-github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg=
-github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
 github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
 github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
 github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ=
@@ -806,8 +777,6 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
 github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
-github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
-github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
@@ -868,12 +837,11 @@ golang.org/x/crypto v0.0.0-20200602180216-279210d13fed/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
 golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
 golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
 golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
-golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
-golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
+golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw=
+golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI=
 golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
@@ -891,8 +859,8 @@ golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
-golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
+golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -912,15 +880,14 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
 golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
 golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
-golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
-golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
+golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
+golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -950,9 +917,7 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -982,11 +947,10 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
-golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s=
+golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
@@ -994,21 +958,20 @@ golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
 golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
 golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
-golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
-golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
+golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24=
+golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
-golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
+golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
@@ -1033,8 +996,8 @@ golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
-golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
+golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
+golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -1088,8 +1051,8 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
-google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
-google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
+google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -1105,7 +1068,6 @@ gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/pkg/concurrency/jobresult_test.go
+++ b/pkg/concurrency/jobresult_test.go
@@ -11,7 +11,7 @@ import (
 )

 var _ = Describe("pkg/concurrency unit tests", func() {
-	It("can be used to recieve a result across goroutines", func() {
+	It("can be used to receive a result across goroutines", func() {
 		jr, wjr := NewJobResult[string, string]("foo")
 		Expect(jr).ToNot(BeNil())
 		Expect(wjr).ToNot(BeNil())
@@ -30,7 +30,7 @@ var _ = Describe("pkg/concurrency unit tests", func() {

 	})

-	It("can be used to recieve an error across goroutines", func() {
+	It("can be used to receive an error across goroutines", func() {
 		jr, wjr := NewJobResult[string, string]("foo")
 		Expect(jr).ToNot(BeNil())
 		Expect(wjr).ToNot(BeNil())
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -53,4 +53,6 @@ type Backend interface {
 	Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)

 	GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error)
+
+	VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error)
 }
--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -92,6 +92,10 @@ func (llm *Base) StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)
 	return pb.StoresFindResult{}, fmt.Errorf("unimplemented")
 }

+func (llm *Base) VAD(*pb.VADRequest) (pb.VADResponse, error) {
+	return pb.VADResponse{}, fmt.Errorf("unimplemented")
+}
+
 func memoryUsage() *pb.MemoryUsageData {
 	mud := pb.MemoryUsageData{
 		Breakdown: make(map[string]uint64),
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -392,3 +392,21 @@ func (c *Client) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opt
 	client := pb.NewBackendClient(conn)
 	return client.GetMetrics(ctx, in, opts...)
 }
+
+func (c *Client) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error) {
+	if !c.parallel {
+		c.opMutex.Lock()
+		defer c.opMutex.Unlock()
+	}
+	c.setBusy(true)
+	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
+	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
+	if err != nil {
+		return nil, err
+	}
+	defer conn.Close()
+	client := pb.NewBackendClient(conn)
+	return client.VAD(ctx, in, opts...)
+}
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -87,6 +87,10 @@ func (e *embedBackend) Rerank(ctx context.Context, in *pb.RerankRequest, opts ..
 	return e.s.Rerank(ctx, in)
 }

+func (e *embedBackend) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOption) (*pb.VADResponse, error) {
+	return e.s.VAD(ctx, in)
+}
+
 func (e *embedBackend) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error) {
 	return e.s.GetMetrics(ctx, in)
 }
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -24,6 +24,8 @@ type LLM interface {
 	StoresDelete(*pb.StoresDeleteOptions) error
 	StoresGet(*pb.StoresGetOptions) (pb.StoresGetResult, error)
 	StoresFind(*pb.StoresFindOptions) (pb.StoresFindResult, error)
+
+	VAD(*pb.VADRequest) (pb.VADResponse, error)
 }

 func newReply(s string) *pb.Reply {
--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@@ -227,6 +227,18 @@ func (s *server) StoresFind(ctx context.Context, in *pb.StoresFindOptions) (*pb.
 	return &res, nil
 }

+func (s *server) VAD(ctx context.Context, in *pb.VADRequest) (*pb.VADResponse, error) {
+	if s.llm.Locking() {
+		s.llm.Lock()
+		defer s.llm.Unlock()
+	}
+	res, err := s.llm.VAD(in)
+	if err != nil {
+		return nil, err
+	}
+	return &res, nil
+}
+
 func StartServer(address string, model LLM) error {
 	lis, err := net.Listen("tcp", address)
 	if err != nil {
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -46,7 +46,6 @@ const (
 	LLamaCPPGRPC = "llama-cpp-grpc"

 	BertEmbeddingsBackend  = "bert-embeddings"
-	RwkvBackend            = "rwkv"
 	WhisperBackend         = "whisper"
 	StableDiffusionBackend = "stablediffusion"
 	TinyDreamBackend       = "tinydream"
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -103,13 +103,13 @@ FILE:
 	return models, nil
 }

-func (ml *ModelLoader) ListModels() []Model {
+func (ml *ModelLoader) ListModels() []*Model {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()

-	models := []Model{}
+	models := []*Model{}
 	for _, model := range ml.models {
-		models = append(models, *model)
+		models = append(models, model)
 	}

 	return models
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -279,6 +279,25 @@ const docTemplate = `{
                }
            }
        },
+        "/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
        "/tts": {
            "post": {
                "consumes": [
@@ -723,6 +742,65 @@ const docTemplate = `{
                    }
                }
            }
+        },
+        "/v1/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/tokenize": {
+            "post": {
+                "summary": "Tokenize the input.",
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/vad": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "summary": "Detect voice fragments in an audio stream",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.VADRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/proto.VADResponse"
+                        }
+                    }
+                }
+            }
        }
    },
    "definitions": {
@@ -972,14 +1050,6 @@ const docTemplate = `{
                }
            }
        },
-        "model.Model": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                }
-            }
-        },
        "openai.Assistant": {
            "type": "object",
            "properties": {
@@ -1164,6 +1234,28 @@ const docTemplate = `{
                "StatusResponse_ERROR"
            ]
        },
+        "proto.VADResponse": {
+            "type": "object",
+            "properties": {
+                "segments": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/proto.VADSegment"
+                    }
+                }
+            }
+        },
+        "proto.VADSegment": {
+            "type": "object",
+            "properties": {
+                "end": {
+                    "type": "number"
+                },
+                "start": {
+                    "type": "number"
+                }
+            }
+        },
        "schema.BackendMonitorRequest": {
            "type": "object",
            "properties": {
@@ -1682,6 +1774,14 @@ const docTemplate = `{
                }
            }
        },
+        "schema.SysInfoModel": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                }
+            }
+        },
        "schema.SystemInformationResponse": {
            "type": "object",
            "properties": {
@@ -1694,7 +1794,7 @@ const docTemplate = `{
                "loaded_models": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/model.Model"
+                        "$ref": "#/definitions/schema.SysInfoModel"
                    }
                }
            }
@@ -1718,14 +1818,25 @@ const docTemplate = `{
                    "description": "model name or full path",
                    "type": "string"
                },
+                "response_format": {
+                    "description": "(optional) output format",
+                    "type": "string"
+                },
                "voice": {
                    "description": "voice audio file or speaker id",
                    "type": "string"
-                },
-		"response_format": {
-                    "description": "(optional) output format of generated audio file, defaults to wav, accept wav, mp3, flac, aac, opus",
-                    "type": "string"
-                },
+                }
+            }
+        },
+        "schema.TokenizeResponse": {
+            "type": "object",
+            "properties": {
+                "tokens": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
+                }
            }
        },
        "schema.ToolCall": {
@@ -1744,6 +1855,23 @@ const docTemplate = `{
                    "type": "string"
                }
            }
+        },
+        "schema.VADRequest": {
+            "description": "VAD request body",
+            "type": "object",
+            "properties": {
+                "audio": {
+                    "description": "model name or full path",
+                    "type": "array",
+                    "items": {
+                        "type": "number"
+                    }
+                },
+                "model": {
+                    "description": "model name or full path",
+                    "type": "string"
+                }
+            }
        }
    },
    "securityDefinitions": {
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -272,6 +272,25 @@
                }
            }
        },
+        "/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
        "/tts": {
            "post": {
                "consumes": [
@@ -716,6 +735,65 @@
                    }
                }
            }
+        },
+        "/v1/tokenMetrics": {
+            "get": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Get TokenMetrics for Active Slot.",
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/tokenize": {
+            "post": {
+                "summary": "Tokenize the input.",
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/vad": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "summary": "Detect voice fragments in an audio stream",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.VADRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/proto.VADResponse"
+                        }
+                    }
+                }
+            }
        }
    },
    "definitions": {
@@ -965,14 +1043,6 @@
                }
            }
        },
-        "model.Model": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                }
-            }
-        },
        "openai.Assistant": {
            "type": "object",
            "properties": {
@@ -1157,6 +1227,28 @@
                "StatusResponse_ERROR"
            ]
        },
+        "proto.VADResponse": {
+            "type": "object",
+            "properties": {
+                "segments": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/proto.VADSegment"
+                    }
+                }
+            }
+        },
+        "proto.VADSegment": {
+            "type": "object",
+            "properties": {
+                "end": {
+                    "type": "number"
+                },
+                "start": {
+                    "type": "number"
+                }
+            }
+        },
        "schema.BackendMonitorRequest": {
            "type": "object",
            "properties": {
@@ -1675,6 +1767,14 @@
                }
            }
        },
+        "schema.SysInfoModel": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                }
+            }
+        },
        "schema.SystemInformationResponse": {
            "type": "object",
            "properties": {
@@ -1687,7 +1787,7 @@
                "loaded_models": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/model.Model"
+                        "$ref": "#/definitions/schema.SysInfoModel"
                    }
                }
            }
@@ -1711,13 +1811,24 @@
                    "description": "model name or full path",
                    "type": "string"
                },
+                "response_format": {
+                    "description": "(optional) output format",
+                    "type": "string"
+                },
                "voice": {
                    "description": "voice audio file or speaker id",
                    "type": "string"
-                },
-		"response_format": {
-                    "description": "(optional) output format of generated audio file, defaults to wav, accept wav, mp3, flac, aac, opus",
-                    "type": "string"
+                }
+            }
+        },
+        "schema.TokenizeResponse": {
+            "type": "object",
+            "properties": {
+                "tokens": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
                }
            }
        },
@@ -1737,6 +1848,23 @@
                    "type": "string"
                }
            }
+        },
+        "schema.VADRequest": {
+            "description": "VAD request body",
+            "type": "object",
+            "properties": {
+                "audio": {
+                    "description": "model name or full path",
+                    "type": "array",
+                    "items": {
+                        "type": "number"
+                    }
+                },
+                "model": {
+                    "description": "model name or full path",
+                    "type": "string"
+                }
+            }
        }
    },
    "securityDefinitions": {
@@ -1746,4 +1874,4 @@
            "in": "header"
        }
    }
-}
+}
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -168,11 +168,6 @@ definitions:
          type: string
        type: array
    type: object
-  model.Model:
-    properties:
-      id:
-        type: string
-    type: object
  openai.Assistant:
    properties:
      created:
@@ -300,6 +295,20 @@ definitions:
    - StatusResponse_BUSY
    - StatusResponse_READY
    - StatusResponse_ERROR
+  proto.VADResponse:
+    properties:
+      segments:
+        items:
+          $ref: '#/definitions/proto.VADSegment'
+        type: array
+    type: object
+  proto.VADSegment:
+    properties:
+      end:
+        type: number
+      start:
+        type: number
+    type: object
  schema.BackendMonitorRequest:
    properties:
      model:
@@ -651,6 +660,11 @@ definitions:
          $ref: '#/definitions/p2p.NodeData'
        type: array
    type: object
+  schema.SysInfoModel:
+    properties:
+      id:
+        type: string
+    type: object
  schema.SystemInformationResponse:
    properties:
      backends:
@@ -659,7 +673,7 @@ definitions:
        type: array
      loaded_models:
        items:
-          $ref: '#/definitions/model.Model'
+          $ref: '#/definitions/schema.SysInfoModel'
        type: array
    type: object
  schema.TTSRequest:
@@ -676,12 +690,19 @@ definitions:
      model:
        description: model name or full path
        type: string
+      response_format:
+        description: (optional) output format
+        type: string
      voice:
        description: voice audio file or speaker id
        type: string
-      response_format:
-        description: (optional) output format of generated audio file, defaults to wav, accept wav, mp3, flac, aac, opus
-        type: string
+    type: object
+  schema.TokenizeResponse:
+    properties:
+      tokens:
+        items:
+          type: integer
+        type: array
    type: object
  schema.ToolCall:
    properties:
@@ -694,6 +715,18 @@ definitions:
      type:
        type: string
    type: object
+  schema.VADRequest:
+    description: VAD request body
+    properties:
+      audio:
+        description: model name or full path
+        items:
+          type: number
+        type: array
+      model:
+        description: model name or full path
+        type: string
+    type: object
 info:
  contact:
    name: LocalAI
@@ -867,6 +900,18 @@ paths:
          schema:
            $ref: '#/definitions/schema.SystemInformationResponse'
      summary: Show the LocalAI instance information
+  /tokenMetrics:
+    get:
+      consumes:
+      - application/json
+      produces:
+      - audio/x-wav
+      responses:
+        "200":
+          description: generated audio/wav file
+          schema:
+            type: string
+      summary: Get TokenMetrics for Active Slot.
  /tts:
    post:
      consumes:
@@ -1149,6 +1194,43 @@ paths:
          schema:
            type: string
      summary: Generates audio from the input text.
+  /v1/tokenMetrics:
+    get:
+      consumes:
+      - application/json
+      produces:
+      - audio/x-wav
+      responses:
+        "200":
+          description: generated audio/wav file
+          schema:
+            type: string
+      summary: Get TokenMetrics for Active Slot.
+  /v1/tokenize:
+    post:
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/schema.TokenizeResponse'
+      summary: Tokenize the input.
+  /vad:
+    post:
+      consumes:
+      - application/json
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.VADRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/proto.VADResponse'
+      summary: Detect voice fragments in an audio stream
 securityDefinitions:
  BearerAuth:
    in: header
--- a/tests/models_fixtures/rwkv.yaml
+++ b/tests/models_fixtures/rwkv.yaml
@@ -1,18 +1,23 @@
 name: rwkv_test
 parameters:
-  model: rwkv
+  model: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
  top_k: 80
  temperature: 0.9
-  max_tokens: 100
+  max_tokens: 4098
  top_p: 0.8
-context_size: 1024
-backend: "rwkv"
-cutwords:
- "Bob:.*"
+context_size: 4098
+
 roles:
-  user: "Bob:"
-  system: "Alice:"
-  assistant: "Alice:"
+  user: "User: "
+  system: "System: "
+  assistant: "Assistant: "
+
+stopwords:
+- 'Assistant:'
+
 template:
-  completion: rwkv_completion
-  chat: rwkv_chat
+  chat: |
+    {{.Input}}
+    Assistant: 
+  completion: |
+    {{.Input}}
--- a/tests/models_fixtures/rwkv_chat.tmpl
+++ b/tests/models_fixtures/rwkv_chat.tmpl
@@ -1,13 +0,0 @@
-The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
-
-Bob: Hello Alice, how are you doing?
-
-Alice: Hi Bob! Thanks, I'm fine. What about you?
-
-Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
-
-Alice: Not at all! I'm listening.
-
-{{.Input}}
-
-Alice: 
--- a/tests/models_fixtures/rwkv_completion.tmpl
+++ b/tests/models_fixtures/rwkv_completion.tmpl
@@ -1 +0,0 @@
-Complete the following sentence: {{.Input}}
				`@@ -1 +0,0 @@`
				`Complete the following sentence: {{.Input}}`