chore(gosec): fix CI

downgrade to latest known version of the gosec action Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
chore(exllama): drop exllama backend
2026-02-03 11:13:31 -05:00 · 2024-09-13 19:17:27 +02:00 · 2024-09-13 19:09:43 +02:00 · 2024-09-13 13:27:36 +02:00 · 2024-09-12 20:55:27 +02:00 · 2024-09-12 06:52:27 +00:00
111 changed files with 2245 additions and 1165 deletions
--- a/.devcontainer-scripts/utils.sh
+++ b/.devcontainer-scripts/utils.sh
@@ -32,18 +32,22 @@ config_remote() {
 }

 # Setup special .ssh files
-#
+# Prints out lines of text to make things pretty
 # Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
 setup_ssh() {
+    echo "starting ~/.ssh directory setup..."
+    mkdir -p "${HOME}.ssh"
+    chmod 0700 "${HOME}/.ssh"
+    echo "-----"
    local files=("$@")
-    for file in "${files[@]}"; then
+    for file in "${files[@]}" ; do
        local cfile="/devcontainer-customization/${file}"
-        local hfile="~/.ssh/${file}"
+        local hfile="${HOME}/.ssh/${file}"
        if [ ! -f "${hfile}" ]; then
-            echo "copying ${file}"
+            echo "copying \"${file}\""
            cp "${cfile}" "${hfile}"
            chmod 600 "${hfile}"
        fi
    done
-    ls ~/.ssh
+    echo "~/.ssh directory setup complete!"
 }
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -56,7 +56,7 @@ jobs:
          rm -rfv ${{ matrix.variable }}_message.txt
          rm -rfv ${{ matrix.variable }}_commit.txt
      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v6
+        uses: peter-evans/create-pull-request@v7
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -17,7 +17,7 @@ jobs:
        run: |
          bash .github/bump_docs.sh ${{ matrix.repository }}
      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v6
+        uses: peter-evans/create-pull-request@v7
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -36,7 +36,7 @@ jobs:
          sudo chmod 777 /hf_cache
          bash .github/checksum_checker.sh gallery/index.yaml
      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v6
+        uses: peter-evans/create-pull-request@v7
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -294,7 +294,7 @@ jobs:
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
-
+          export SKIP_GRPC_BACKEND=backend-assets/grpc/whisper
          make dist
      - uses: actions/upload-artifact@v4
        with:
@@ -327,7 +327,7 @@ jobs:
          cache: false
      - name: Dependencies
        run: |
-          brew install protobuf grpc
+          brew install protobuf grpc libomp llvm
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Build
@@ -336,7 +336,7 @@ jobs:
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
          export PATH=$PATH:$GOPATH/bin
-
+          export CC=/opt/homebrew/opt/llvm/bin/clang
          make dist
      - uses: actions/upload-artifact@v4
        with:
--- a/.github/workflows/secscan.yaml
+++ b/.github/workflows/secscan.yaml
@@ -18,7 +18,7 @@ jobs:
        if: ${{ github.actor != 'dependabot[bot]' }}
      - name: Run Gosec Security Scanner
        if: ${{ github.actor != 'dependabot[bot]' }}
-        uses: securego/gosec@master
+        uses: securego/gosec@v2.21.0
        with:
          # we let the report trigger content trigger a failure using the GitHub Security features.
          args: '-no-fail -fmt sarif -out results.sarif ./...'
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -214,12 +214,13 @@ jobs:
        run: go version
      - name: Dependencies
        run: |
-          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
+          brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
          pip install --user --no-cache-dir grpcio-tools==1.64.1
      - name: Test
        run: |
          export C_INCLUDE_PATH=/usr/local/include
          export CPLUS_INCLUDE_PATH=/usr/local/include
+          export CC=/opt/homebrew/opt/llvm/bin/clang
          # Used to run the newer GNUMake version from brew that supports --output-sync
          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make --jobs 4 --output-sync=target test
--- a/.github/workflows/update_swagger.yaml
+++ b/.github/workflows/update_swagger.yaml
@@ -25,7 +25,7 @@ jobs:
        run: |
          make protogen-go swagger
      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v6
+        uses: peter-evans/create-pull-request@v7
        with:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
--- a/39
+++ b/39
@@ -13,7 +13,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT

 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"


 RUN apt-get update && \
@@ -263,14 +263,20 @@ EOT
 # In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
 FROM builder-base AS builder-sd

-COPY . .
-COPY .git .
+# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
+COPY Makefile .
+COPY go.mod .
+COPY go.sum .
+COPY backend/backend.proto ./backend/backend.proto
+COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
+COPY pkg/grpc ./pkg/grpc
+COPY pkg/stablediffusion ./pkg/stablediffusion
+RUN git init
+RUN make sources/go-stable-diffusion
+RUN touch prepare-sources

-RUN make prepare
-
-
-# stablediffusion does not tolerate a newer version of abseil, build it first
-RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
+# Actually build the backend
+RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion

 ###################################
 ###################################
@@ -285,8 +291,20 @@ COPY --from=grpc /opt/grpc /usr/local
 # Rebuild with defaults backends
 WORKDIR /build

+COPY . .
+COPY .git .
+
+RUN make prepare
+
 ## Build the binary
-RUN make build
+## If it's CUDA, we want to skip some of the llama-compat backends to save space
+## We only leave the most CPU-optimized variant and the fallback for the cublas build
+## (both will use CUDA for the actual computation)
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
+        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
+    else \
+        make build; \
+    fi

 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
        mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
@@ -400,9 +418,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
    ; fi && \
    if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
        make -C backend/python/transformers-musicgen \
-    ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "exllama1" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/exllama \
    ; fi

 RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
--- a/19
+++ b/19
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=fc54ef0d1c138133a01933296d50a36a1ab64735
+CPPLLAMA_VERSION?=e6b7801bd189d102d901d3e72035611a25456ef1

 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=9e3c5345cd46ea718209db53464e426c3fe7a25e
+WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -534,10 +534,10 @@ protogen-go-clean:
 	$(RM) bin/*

 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen

 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean  exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean

 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -571,14 +571,6 @@ diffusers-protogen:
 diffusers-protogen-clean:
 	$(MAKE) -C backend/python/diffusers protogen-clean

-.PHONY: exllama-protogen
-exllama-protogen:
-	$(MAKE) -C backend/python/exllama protogen
-
-.PHONY: exllama-protogen-clean
-exllama-protogen-clean:
-	$(MAKE) -C backend/python/exllama protogen-clean
-
 .PHONY: exllama2-protogen
 exllama2-protogen:
 	$(MAKE) -C backend/python/exllama2 protogen
@@ -675,7 +667,6 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/parler-tts
 	$(MAKE) -C backend/python/vall-e-x
 	$(MAKE) -C backend/python/openvoice
-	$(MAKE) -C backend/python/exllama
 	$(MAKE) -C backend/python/exllama2

 prepare-test-extra: protogen-python
@@ -846,7 +837,7 @@ endif

 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
 ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/whisper
 endif
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@

 > :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
 >
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) 

 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)

@@ -72,6 +72,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)

+- Aug 2024:  🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
 - July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
 - June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
 - June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
--- a/aio/intel/image-gen.yaml
+++ b/aio/intel/image-gen.yaml
@@ -1,6 +1,6 @@
 name: stablediffusion
 parameters:
-  model: runwayml/stable-diffusion-v1-5
+  model: Lykon/dreamshaper-8
 backend: diffusers
 step: 25
 f16: true
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -16,6 +16,7 @@ service Backend {
  rpc GenerateImage(GenerateImageRequest) returns (Result) {}
  rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
  rpc TTS(TTSRequest) returns (Result) {}
+  rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
  rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
  rpc Status(HealthMessage) returns (StatusResponse) {}

@@ -270,6 +271,17 @@ message TTSRequest {
  optional string language = 5;
 }

+message SoundGenerationRequest {
+  string text = 1;
+  string model = 2;
+  string dst = 3;
+  optional float duration = 4;
+  optional float temperature = 5;
+  optional bool sample = 6;
+  optional string src = 7;
+  optional int32 src_divisor = 8;
+}
+
 message TokenizationResponse {
  int32 length = 1;
  repeated int32 tokens = 2;
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -17,11 +17,10 @@
 #include "common.h"
 #include "json.hpp"
 #include "llama.h"
-#include "grammar-parser.h"
 #include "backend.pb.h"
 #include "backend.grpc.pb.h"
 #include "utils.hpp"
-
+#include "sampling.h"
 // include std::regex
 #include <cstddef>
 #include <thread>
@@ -203,8 +202,8 @@ struct llama_client_slot
    std::string stopping_word;

    // sampling
-    struct llama_sampling_params sparams;
-    llama_sampling_context *ctx_sampling = nullptr;
+    struct gpt_sampler_params sparams;
+    gpt_sampler *ctx_sampling = nullptr;

    int32_t ga_i = 0;   // group-attention state
    int32_t ga_n = 1;   // group-attention factor
@@ -619,7 +618,7 @@ struct llama_server_context

    bool launch_slot_with_data(llama_client_slot* &slot, json data) {
        slot_params default_params;
-        llama_sampling_params default_sparams;
+        gpt_sampler_params default_sparams;
 
        slot->params.stream             = json_value(data, "stream",            false);
        slot->params.cache_prompt       = json_value(data, "cache_prompt",      false);
@@ -628,7 +627,7 @@ struct llama_server_context
        slot->sparams.top_p             = json_value(data, "top_p",             default_sparams.top_p);
        slot->sparams.min_p             = json_value(data, "min_p",             default_sparams.min_p);
        slot->sparams.tfs_z             = json_value(data, "tfs_z",             default_sparams.tfs_z);
-        slot->sparams.typical_p         = json_value(data, "typical_p",         default_sparams.typical_p);
+        slot->sparams.typ_p             = json_value(data, "typical_p",         default_sparams.typ_p);
        slot->sparams.temp              = json_value(data, "temperature",       default_sparams.temp);
        slot->sparams.dynatemp_range    = json_value(data, "dynatemp_range",    default_sparams.dynatemp_range);
        slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
@@ -641,7 +640,7 @@ struct llama_server_context
        slot->sparams.mirostat_eta      = json_value(data, "mirostat_eta",      default_sparams.mirostat_eta);
        slot->sparams.penalize_nl       = json_value(data, "penalize_nl",       default_sparams.penalize_nl);
        slot->params.n_keep             = json_value(data, "n_keep",            slot->params.n_keep);
-        slot->params.seed               = json_value(data, "seed",              default_params.seed);
+        slot->sparams.seed               = json_value(data, "seed",              default_sparams.seed);
        slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
        slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
        slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
@@ -665,6 +664,7 @@ struct llama_server_context
            slot->params.input_prefix = "";
        }

+
        if (data.count("input_suffix") != 0)
        {
            slot->params.input_suffix = data["input_suffix"];
@@ -683,6 +683,10 @@ struct llama_server_context
            slot->prompt = "";
        }

+        if (json_value(data, "ignore_eos", false)) {
+                slot->sparams.logit_bias.push_back({llama_token_eos(model), -INFINITY});
+        }
+        /*
        slot->sparams.penalty_prompt_tokens.clear();
        slot->sparams.use_penalty_prompt_tokens = false;
        const auto &penalty_prompt = data.find("penalty_prompt");
@@ -718,14 +722,10 @@ struct llama_server_context
                slot->sparams.use_penalty_prompt_tokens = true;
            }
        }
+      */

        slot->sparams.logit_bias.clear();

-        if (json_value(data, "ignore_eos", false))
-        {
-            slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
-        }
-
        const auto &logit_bias = data.find("logit_bias");
        if (logit_bias != data.end() && logit_bias->is_array())
        {
@@ -753,7 +753,7 @@ struct llama_server_context
                        llama_token tok = el[0].get<llama_token>();
                        if (tok >= 0 && tok < n_vocab)
                        {
-                            slot->sparams.logit_bias[tok] = bias;
+                            slot->sparams.logit_bias.push_back({tok, bias});
                        }
                    }
                    else if (el[0].is_string())
@@ -761,13 +761,13 @@ struct llama_server_context
                        auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
                        for (auto tok : toks)
                        {
-                            slot->sparams.logit_bias[tok] = bias;
+                            slot->sparams.logit_bias.push_back({tok, bias});
                        }
                    }
                }
            }
        }
-
+        
        slot->params.antiprompt.clear();

        const auto &stop = data.find("stop");
@@ -781,24 +781,22 @@ struct llama_server_context
                }
            }
        }
-
-        const auto &samplers_sequence = data.find("samplers");
-        if (samplers_sequence != data.end() && samplers_sequence->is_array())
-        {
+        
+        const auto & samplers = data.find("samplers");
+        if (samplers != data.end() && samplers->is_array()) {
            std::vector<std::string> sampler_names;
-            for (const auto &sampler_name : *samplers_sequence)
-            {
-                if (sampler_name.is_string())
-                {
-                    sampler_names.emplace_back(sampler_name);
+                for (const auto & name : *samplers) {
+                    if (name.is_string()) {
+                        sampler_names.emplace_back(name);
+                    }
                }
-            }
-            slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
+                slot->sparams.samplers = gpt_sampler_types_from_names(sampler_names, false);
        }
        else
        {
-            slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
+                slot->sparams.samplers = default_sparams.samplers;
        }
+        

        if (multimodal)
        {
@@ -875,10 +873,10 @@ struct llama_server_context

        if (slot->ctx_sampling != nullptr)
        {
-            llama_sampling_free(slot->ctx_sampling);
+            gpt_sampler_free(slot->ctx_sampling);
        }
-        slot->ctx_sampling = llama_sampling_init(slot->sparams);
-        llama_set_rng_seed(ctx, slot->params.seed);
+        slot->ctx_sampling = gpt_sampler_init(model, slot->sparams);
+        //llama_set_rng_seed(ctx, slot->params.seed);
        slot->command = LOAD_PROMPT;

        all_slots_are_idle = false;
@@ -888,7 +886,7 @@ struct llama_server_context
            {"task_id", slot->task_id},
        });

-        LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
+      //  LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());

        return true;
    }
@@ -1006,11 +1004,13 @@ struct llama_server_context
        slot.generated_text += token_str;
        slot.has_next_token = true;

+/*
        if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
        {
            // we can change penalty_prompt_tokens because it is always created from scratch each request
            slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
        }
+        */

        // check if there is incomplete UTF-8 character at the end
        bool incomplete = false;
@@ -1119,7 +1119,7 @@ struct llama_server_context
                continue;
            }

-            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
+            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
                LOG_TEE("Error processing the given image");
                return false;
            }
@@ -1144,13 +1144,11 @@ struct llama_server_context

    json get_formated_generation(llama_client_slot &slot)
    {
-        const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
-        const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
-                                eos_bias->second < 0.0f && std::isinf(eos_bias->second);
-        std::vector<std::string> samplers_sequence;
-        for (const auto &sampler_type : slot.sparams.samplers_sequence)
+        std::vector<std::string> samplers;
+        samplers.reserve(slot.sparams.samplers.size());
+        for (const auto & sampler : slot.sparams.samplers)
        {
-            samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
+            samplers.emplace_back(gpt_sampler_type_to_str(sampler));
        }

        return json {
@@ -1165,13 +1163,11 @@ struct llama_server_context
            {"top_p",             slot.sparams.top_p},
            {"min_p",             slot.sparams.min_p},
            {"tfs_z",             slot.sparams.tfs_z},
-            {"typical_p",         slot.sparams.typical_p},
+            {"typical_p",         slot.sparams.typ_p},
            {"repeat_last_n",     slot.sparams.penalty_last_n},
            {"repeat_penalty",    slot.sparams.penalty_repeat},
            {"presence_penalty",  slot.sparams.penalty_present},
            {"frequency_penalty", slot.sparams.penalty_freq},
-            {"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
-            {"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
            {"mirostat",          slot.sparams.mirostat},
            {"mirostat_tau",      slot.sparams.mirostat_tau},
            {"mirostat_eta",      slot.sparams.mirostat_eta},
@@ -1179,13 +1175,13 @@ struct llama_server_context
            {"stop",              slot.params.antiprompt},
            {"n_predict",         slot.params.n_predict},
            {"n_keep",            params.n_keep},
-            {"ignore_eos",        ignore_eos},
+            {"ignore_eos",        slot.sparams.ignore_eos},
            {"stream",            slot.params.stream},
-            {"logit_bias",        slot.sparams.logit_bias},
+      //      {"logit_bias",        slot.sparams.logit_bias},
            {"n_probs",           slot.sparams.n_probs},
            {"min_keep",          slot.sparams.min_keep},
            {"grammar",           slot.sparams.grammar},
-            {"samplers",          samplers_sequence}
+            {"samplers",          samplers}
        };
    }

@@ -1714,7 +1710,7 @@ struct llama_server_context

                    if (!slot.params.cache_prompt)
                    {
-                        llama_sampling_reset(slot.ctx_sampling);
+                        gpt_sampler_reset(slot.ctx_sampling);

                        slot.n_past = 0;
                        slot.n_past_se = 0;
@@ -1726,7 +1722,7 @@ struct llama_server_context
                        // push the prompt into the sampling context (do not apply grammar)
                        for (auto &token : prompt_tokens)
                        {
-                            llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
+                            gpt_sampler_accept(slot.ctx_sampling, token, false);
                        }

                        slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
@@ -1934,9 +1930,9 @@ struct llama_server_context
                }

                completion_token_output result;
-                const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
+                const llama_token id = gpt_sampler_sample(slot.ctx_sampling, ctx, slot.i_batch - i);

-                llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
+                gpt_sampler_accept(slot.ctx_sampling, id, true);

                slot.n_decoded += 1;
                if (slot.n_decoded == 1)
@@ -1946,19 +1942,14 @@ struct llama_server_context
                    metrics.on_prompt_eval(slot);
                }

-                llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
                result.tok = id;
+                const auto * cur_p = gpt_sampler_get_candidates(slot.ctx_sampling);

-                const int32_t n_probs = slot.sparams.n_probs;
-                if (slot.sparams.temp <= 0 && n_probs > 0)
-                {
-                    // for llama_sample_token_greedy we need to sort candidates
-                    llama_sample_softmax(ctx, &cur_p);
-                }
-
-                for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
-                {
-                    result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
+                for (size_t i = 0; i < (size_t) slot.sparams.n_probs; ++i) {
+                    result.probs.push_back({
+                        cur_p->data[i].id,
+                        i >= cur_p->size ? 0.0f : cur_p->data[i].p,
+                    });
                }

                if (!process_token(result, slot))
@@ -2210,7 +2201,7 @@ static void params_parse(const backend::ModelOptions* request,
    params.model_alias =  request->modelfile();
    params.n_ctx = request->contextsize();
    //params.memory_f16 = request->f16memory();
-    params.n_threads = request->threads();
+    params.cpuparams.n_threads = request->threads();
    params.n_gpu_layers = request->ngpulayers();
    params.n_batch = request->nbatch();
    // Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1
--- a/backend/cpp/llama/patches/01-llava.patch
+++ b/backend/cpp/llama/patches/01-llava.patch
@@ -0,0 +1,13 @@
+diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
+index 342042ff..224db9b5 100644
+--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
+@@ -2419,7 +2419,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
+             struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
+             int* patches_data = (int*)malloc(ggml_nbytes(patches));
+             for (int i = 0; i < num_patches; i++) {
+-                patches_data[i] = i + 1;
+                patches_data[i] = i;
+             }
+             ggml_backend_tensor_set(patches, patches_data, 0, ggml_nbytes(patches));
+             free(patches_data);
--- a/backend/cpp/llama/prepare.sh
+++ b/backend/cpp/llama/prepare.sh
@@ -1,5 +1,12 @@
 #!/bin/bash

+## Patches
+## Apply patches from the `patches` directory
+for patch in $(ls patches); do
+    echo "Applying patch $patch"
+    patch -d llama.cpp/ -p1 < patches/$patch
+done 
+
 cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
 cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
 cp -rfv json.hpp llama.cpp/examples/grpc-server/
--- a/backend/cpp/llama/utils.hpp
+++ b/backend/cpp/llama/utils.hpp
@@ -480,31 +480,4 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
    }

    return ret;
-}
-
-//
-// random string / id
-//
-
-static std::string random_string()
-{
-    static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
-
-    std::random_device rd;
-    std::mt19937 generator(rd());
-
-    std::string result(32, ' ');
-
-    for (int i = 0; i < 32; ++i) {
-        result[i] = str[generator() % str.size()];
-    }
-
-    return result;
-}
-
-static std::string gen_chatcmplid()
-{
-    std::stringstream chatcmplid;
-    chatcmplid << "chatcmpl-" << random_string();
-    return chatcmplid.str();
 }
--- a/backend/go/transcribe/transcript.go
+++ b/backend/go/transcribe/transcript.go
@@ -1,104 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"os"
-	"os/exec"
-	"path/filepath"
-
-	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
-	"github.com/go-audio/wav"
-	"github.com/mudler/LocalAI/core/schema"
-)
-
-func ffmpegCommand(args []string) (string, error) {
-	cmd := exec.Command("ffmpeg", args...) // Constrain this to ffmpeg to permit security scanner to see that the command is safe.
-	cmd.Env = os.Environ()
-	out, err := cmd.CombinedOutput()
-	return string(out), err
-}
-
-// AudioToWav converts audio to wav for transcribe.
-// TODO: use https://github.com/mccoyst/ogg?
-func audioToWav(src, dst string) error {
-	commandArgs := []string{"-i", src, "-format", "s16le", "-ar", "16000", "-ac", "1", "-acodec", "pcm_s16le", dst}
-	out, err := ffmpegCommand(commandArgs)
-	if err != nil {
-		return fmt.Errorf("error: %w out: %s", err, out)
-	}
-	return nil
-}
-
-func Transcript(model whisper.Model, audiopath, language string, translate bool, threads uint) (schema.TranscriptionResult, error) {
-	res := schema.TranscriptionResult{}
-
-	dir, err := os.MkdirTemp("", "whisper")
-	if err != nil {
-		return res, err
-	}
-	defer os.RemoveAll(dir)
-
-	convertedPath := filepath.Join(dir, "converted.wav")
-
-	if err := audioToWav(audiopath, convertedPath); err != nil {
-		return res, err
-	}
-
-	// Open samples
-	fh, err := os.Open(convertedPath)
-	if err != nil {
-		return res, err
-	}
-	defer fh.Close()
-
-	// Read samples
-	d := wav.NewDecoder(fh)
-	buf, err := d.FullPCMBuffer()
-	if err != nil {
-		return res, err
-	}
-
-	data := buf.AsFloat32Buffer().Data
-
-	// Process samples
-	context, err := model.NewContext()
-	if err != nil {
-		return res, err
-
-	}
-
-	context.SetThreads(threads)
-
-	if language != "" {
-		context.SetLanguage(language)
-	} else {
-		context.SetLanguage("auto")
-	}
-
-	if translate {
-		context.SetTranslate(true)
-	}
-
-	if err := context.Process(data, nil, nil); err != nil {
-		return res, err
-	}
-
-	for {
-		s, err := context.NextSegment()
-		if err != nil {
-			break
-		}
-
-		var tokens []int
-		for _, t := range s.Tokens {
-			tokens = append(tokens, t.Id)
-		}
-
-		segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
-		res.Segments = append(res.Segments, segment)
-
-		res.Text += s.Text
-	}
-
-	return res, nil
-}
--- a/backend/go/transcribe/whisper.go
+++ b/backend/go/transcribe/whisper.go
@@ -1,26 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-type Whisper struct {
-	base.SingleThread
-	whisper whisper.Model
-}
-
-func (sd *Whisper) Load(opts *pb.ModelOptions) error {
-	// Note: the Model here is a path to a directory containing the model files
-	w, err := whisper.New(opts.ModelFile)
-	sd.whisper = w
-	return err
-}
-
-func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.TranscriptionResult, error) {
-	return Transcript(sd.whisper, opts.Dst, opts.Language, opts.Translate, uint(opts.Threads))
-}
--- a/backend/go/transcribe/whisper/main.go
+++ b/backend/go/transcribe/whisper/main.go
--- a/backend/go/transcribe/whisper/whisper.go
+++ b/backend/go/transcribe/whisper/whisper.go
@@ -0,0 +1,105 @@
+package main
+
+// This is a wrapper to statisfy the GRPC service interface
+// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
+import (
+	"os"
+	"path/filepath"
+
+	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	"github.com/go-audio/wav"
+	"github.com/mudler/LocalAI/pkg/grpc/base"
+	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/utils"
+)
+
+type Whisper struct {
+	base.SingleThread
+	whisper whisper.Model
+}
+
+func (sd *Whisper) Load(opts *pb.ModelOptions) error {
+	// Note: the Model here is a path to a directory containing the model files
+	w, err := whisper.New(opts.ModelFile)
+	sd.whisper = w
+	return err
+}
+
+func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (pb.TranscriptResult, error) {
+
+	dir, err := os.MkdirTemp("", "whisper")
+	if err != nil {
+		return pb.TranscriptResult{}, err
+	}
+	defer os.RemoveAll(dir)
+
+	convertedPath := filepath.Join(dir, "converted.wav")
+
+	if err := utils.AudioToWav(opts.Dst, convertedPath); err != nil {
+		return pb.TranscriptResult{}, err
+	}
+
+	// Open samples
+	fh, err := os.Open(convertedPath)
+	if err != nil {
+		return pb.TranscriptResult{}, err
+	}
+	defer fh.Close()
+
+	// Read samples
+	d := wav.NewDecoder(fh)
+	buf, err := d.FullPCMBuffer()
+	if err != nil {
+		return pb.TranscriptResult{}, err
+	}
+
+	data := buf.AsFloat32Buffer().Data
+
+	// Process samples
+	context, err := sd.whisper.NewContext()
+	if err != nil {
+		return pb.TranscriptResult{}, err
+
+	}
+
+	context.SetThreads(uint(opts.Threads))
+
+	if opts.Language != "" {
+		context.SetLanguage(opts.Language)
+	} else {
+		context.SetLanguage("auto")
+	}
+
+	if opts.Translate {
+		context.SetTranslate(true)
+	}
+
+	if err := context.Process(data, nil, nil); err != nil {
+		return pb.TranscriptResult{}, err
+	}
+
+	segments := []*pb.TranscriptSegment{}
+	text := ""
+	for {
+		s, err := context.NextSegment()
+		if err != nil {
+			break
+		}
+
+		var tokens []int32
+		for _, t := range s.Tokens {
+			tokens = append(tokens, int32(t.Id))
+		}
+
+		segment := &pb.TranscriptSegment{Id: int32(s.Num), Text: s.Text, Start: int64(s.Start), End: int64(s.End), Tokens: tokens}
+		segments = append(segments, segment)
+
+		text += s.Text
+	}
+
+	return pb.TranscriptResult{
+		Segments: segments,
+		Text:     text,
+	}, nil
+
+}
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.4
+grpcio==1.66.1
 protobuf
 certifi
 transformers
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
 TTS==0.22.0
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -168,7 +168,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
            if request.CFGScale != 0:
                self.cfg_scale = request.CFGScale

-            clipmodel = "runwayml/stable-diffusion-v1-5"
+            clipmodel = "Lykon/dreamshaper-8"
            if request.CLIPModel != "":
                clipmodel = request.CLIPModel
            clipsubfolder = "text_encoder"
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.65.4
+grpcio==1.66.1
 pillow
 protobuf
 certifi
--- a/backend/python/diffusers/test.py
+++ b/backend/python/diffusers/test.py
@@ -53,7 +53,7 @@ class TestBackendServicer(unittest.TestCase):
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
                self.assertTrue(response.success)
                self.assertEqual(response.message, "Model loaded successfully")
        except Exception as err:
@@ -71,7 +71,7 @@ class TestBackendServicer(unittest.TestCase):
            self.setUp()
            with grpc.insecure_channel("localhost:50051") as channel:
                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="runwayml/stable-diffusion-v1-5"))
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="Lykon/dreamshaper-8"))
                print(response.message)
                self.assertTrue(response.success)
                image_req = backend_pb2.GenerateImageRequest(positive_prompt="cat", width=16,height=16, dst="test.jpg")
@@ -81,4 +81,4 @@ class TestBackendServicer(unittest.TestCase):
            print(err)
            self.fail("Image gen service failed")
        finally:
-            self.tearDown()
+            self.tearDown()
--- a/backend/python/exllama/.gitignore
+++ b/backend/python/exllama/.gitignore
@@ -1 +0,0 @@
-source
--- a/backend/python/exllama/Makefile
+++ b/backend/python/exllama/Makefile
@@ -1,25 +0,0 @@
-export CONDA_ENV_PATH = "exllama.yml"
-
-.PHONY: exllama
-exllama: protogen
-	bash install.sh ${CONDA_ENV_PATH}
-
-.PHONY: run
-run: protogen
-	@echo "Running exllama..."
-	bash run.sh
-	@echo "exllama run."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	$(RM) -r venv source __pycache__
--- a/backend/python/exllama/README.md
+++ b/backend/python/exllama/README.md
@@ -1,5 +0,0 @@
-# Creating a separate environment for the exllama project
-
-```
-make exllama
-```
--- a/backend/python/exllama/backend.py
+++ b/backend/python/exllama/backend.py
@@ -1,159 +0,0 @@
-#!/usr/bin/env python3
-import grpc
-from concurrent import futures
-import time
-import backend_pb2
-import backend_pb2_grpc
-import argparse
-import signal
-import sys
-import os, glob
-
-from pathlib import Path
-import torch
-import torch.nn.functional as F
-from torch import version as torch_version
-
-from source.tokenizer import ExLlamaTokenizer
-from source.generator import ExLlamaGenerator
-from source.model import ExLlama, ExLlamaCache, ExLlamaConfig
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    def generate(self,prompt, max_new_tokens):
-        self.generator.end_beam_search()
-
-        # Tokenizing the input
-        ids = self.generator.tokenizer.encode(prompt)
-
-        self.generator.gen_begin_reuse(ids)
-        initial_len = self.generator.sequence[0].shape[0]
-        has_leading_space = False
-        decoded_text = ''
-        for i in range(max_new_tokens):
-            token = self.generator.gen_single_token()
-            if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
-                has_leading_space = True
-
-            decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
-            if has_leading_space:
-                decoded_text = ' ' + decoded_text
-
-            if token.item() == self.generator.tokenizer.eos_token_id:
-                break
-        return decoded_text
-    def Health(self, request, context):
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-    def LoadModel(self, request, context):
-        try:
-            # https://github.com/turboderp/exllama/blob/master/example_cfg.py
-            model_directory = request.ModelFile
-
-            # Locate files we need within that directory
-            tokenizer_path = os.path.join(model_directory, "tokenizer.model")
-            model_config_path = os.path.join(model_directory, "config.json")
-            st_pattern = os.path.join(model_directory, "*.safetensors")
-            model_path = glob.glob(st_pattern)[0]
-
-            # Create config, model, tokenizer and generator
-
-            config = ExLlamaConfig(model_config_path)               # create config from config.json
-            config.model_path = model_path                          # supply path to model weights file
-            if (request.ContextSize):
-                config.max_seq_len = request.ContextSize            # override max sequence length
-                config.max_attention_size = request.ContextSize**2  # Should be set to context_size^2. 
-                # https://github.com/turboderp/exllama/issues/220#issuecomment-1720324163
-
-            # Set Rope scaling.
-            if (request.RopeFreqScale):
-                # Alpha value for Rope scaling. 
-                # Higher value increases context but adds perplexity.
-                # alpha_value and compress_pos_emb are mutually exclusive.
-                # https://github.com/turboderp/exllama/issues/115
-                config.alpha_value = request.RopeFreqScale
-                config.calculate_rotary_embedding_base()
-
-            model = ExLlama(config)                                 # create ExLlama instance and load the weights
-            tokenizer = ExLlamaTokenizer(tokenizer_path)            # create tokenizer from tokenizer model file
-
-            cache = ExLlamaCache(model, batch_size = 2)             # create cache for inference
-            generator = ExLlamaGenerator(model, tokenizer, cache)   # create generator
-
-            self.generator= generator
-            self.model = model
-            self.tokenizer = tokenizer
-            self.cache = cache
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def Predict(self, request, context):
-        penalty = 1.15
-        if request.Penalty != 0.0:
-            penalty = request.Penalty
-        self.generator.settings.token_repetition_penalty_max = penalty
-        self.generator.settings.temperature = request.Temperature
-        self.generator.settings.top_k = request.TopK
-        self.generator.settings.top_p = request.TopP
-
-        tokens = 512
-        if request.Tokens != 0:
-            tokens = request.Tokens
-
-        if self.cache.batch_size == 1:
-            del self.cache
-            self.cache = ExLlamaCache(self.model, batch_size=2)
-            self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache)
-
-        t = self.generate(request.Prompt, tokens)
-
-        # Remove prompt from response if present
-        if request.Prompt in t:
-            t = t.replace(request.Prompt, "")
-
-        return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
-
-    def PredictStream(self, request, context):
-        # Implement PredictStream RPC
-        #for reply in some_data_generator():
-        #    yield reply
-        # Not implemented yet
-        return self.Predict(request, context)
-
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
--- a/backend/python/exllama/install.sh
+++ b/backend/python/exllama/install.sh
@@ -1,13 +0,0 @@
-#!/bin/bash
-set -e
-
-LIMIT_TARGETS="cublas"
-
-source $(dirname $0)/../common/libbackend.sh
-
-installRequirements
-
-git clone https://github.com/turboderp/exllama $MY_DIR/source
-uv pip install ${BUILD_ISOLATION_FLAG} --requirement ${MY_DIR}/source/requirements.txt
-
-cp -v ./*py $MY_DIR/source/
--- a/backend/python/exllama/requirements-cpu.txt
+++ b/backend/python/exllama/requirements-cpu.txt
@@ -1,3 +0,0 @@
-transformers
-accelerate
-torch
--- a/backend/python/exllama/requirements-cublas11.txt
+++ b/backend/python/exllama/requirements-cublas11.txt
@@ -1,4 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-transformers
-accelerate
--- a/backend/python/exllama/requirements-cublas12.txt
+++ b/backend/python/exllama/requirements-cublas12.txt
@@ -1,3 +0,0 @@
-torch
-transformers
-accelerate
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,4 +0,0 @@
-grpcio==1.65.5
-protobuf
-certifi
-setuptools
--- a/backend/python/exllama/run.sh
+++ b/backend/python/exllama/run.sh
@@ -1,7 +0,0 @@
-#!/bin/bash
-LIMIT_TARGETS="cublas"
-BACKEND_FILE="${MY_DIR}/source/backend.py"
-
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
--- a/backend/python/exllama/test.sh
+++ b/backend/python/exllama/test.sh
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.4
+grpcio==1.66.1
 protobuf
 certifi
 wheel
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 librosa
 faster-whisper
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
 llvmlite==0.43.0
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.4
+grpcio==1.66.1
 protobuf
 certifi
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/backend.py
+++ b/backend/python/transformers-musicgen/backend.py
@@ -15,7 +15,7 @@ import backend_pb2_grpc

 import grpc

-from scipy.io.wavfile import write as write_wav
+from scipy.io import wavfile
 from transformers import AutoProcessor, MusicgenForConditionalGeneration

 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -63,6 +63,61 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):

        return backend_pb2.Result(message="Model loaded successfully", success=True)

+    def SoundGeneration(self, request, context):
+        model_name = request.model
+        if model_name == "":
+            return backend_pb2.Result(success=False, message="request.model is required")
+        try:
+            self.processor = AutoProcessor.from_pretrained(model_name)
+            self.model = MusicgenForConditionalGeneration.from_pretrained(model_name)
+            inputs = None
+            if request.text == "":
+                inputs = self.model.get_unconditional_inputs(num_samples=1)
+            elif request.HasField('src'):
+                # TODO SECURITY CODE GOES HERE LOL
+                # WHO KNOWS IF THIS WORKS???
+                sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
+                
+                if request.HasField('src_divisor'):
+                    wsamples = wsamples[: len(wsamples) // request.src_divisor]
+                
+                inputs = self.processor(
+                    audio=wsamples,
+                    sampling_rate=sample_rate,
+                    text=[request.text],
+                    padding=True,
+                    return_tensors="pt",
+                )
+            else:
+                inputs = self.processor(
+                    text=[request.text],
+                    padding=True,
+                    return_tensors="pt",
+                )
+            
+            tokens = 256
+            if request.HasField('duration'):
+                tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
+            guidance = 3.0
+            if request.HasField('temperature'):
+                guidance = request.temperature
+            dosample = True
+            if request.HasField('sample'):
+                dosample = request.sample
+            audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens)
+            print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr)
+            sampling_rate = self.model.config.audio_encoder.sampling_rate
+            wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy())
+            print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr)
+            print("[transformers-musicgen] SoundGeneration for", file=sys.stderr)
+            print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr)
+            print(request, file=sys.stderr)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+        return backend_pb2.Result(success=True)
+
+
+# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons
    def TTS(self, request, context):
        model_name = request.model
        if model_name == "":
@@ -75,8 +130,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                padding=True,
                return_tensors="pt",
            )
-            tokens = 256
-            # TODO get tokens from request?
+            tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default
            audio_values = self.model.generate(**inputs, max_new_tokens=tokens)
            print("[transformers-musicgen] TTS generated!", file=sys.stderr)
            sampling_rate = self.model.config.audio_encoder.sampling_rate
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 scipy==1.14.0
 certifi
--- a/backend/python/transformers-musicgen/test.py
+++ b/backend/python/transformers-musicgen/test.py
@@ -63,7 +63,7 @@ class TestBackendServicer(unittest.TestCase):

    def test_tts(self):
        """
-        This method tests if the embeddings are generated successfully
+        This method tests if TTS is generated successfully
        """
        try:
            self.setUp()
@@ -77,5 +77,24 @@ class TestBackendServicer(unittest.TestCase):
        except Exception as err:
            print(err)
            self.fail("TTS service failed")
+        finally:
+            self.tearDown()
+
+    def test_sound_generation(self):
+        """
+        This method tests if SoundGeneration is generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small"))
+                self.assertTrue(response.success)
+                sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story")
+                sg_response = stub.SoundGeneration(sg_request)
+                self.assertIsNotNone(sg_response)
+        except Exception as err:
+            print(err)
+            self.fail("SoundGeneration service failed")
        finally:
            self.tearDown()
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
--- a/backend/python/vllm/backend.py
+++ b/backend/python/vllm/backend.py
@@ -135,6 +135,26 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
        res = await gen.__anext__()
        return res

+    def Embedding(self, request, context):
+        """
+        A gRPC method that calculates embeddings for a given sentence.
+
+        Args:
+            request: An EmbeddingRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            An EmbeddingResult object that contains the calculated embeddings.
+        """
+        print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
+        outputs = self.model.encode(request.Embeddings)
+        # Check if we have one result at least
+        if len(outputs) == 0:
+            context.set_code(grpc.StatusCode.INVALID_ARGUMENT)
+            context.set_details("No embeddings were calculated.")
+            return backend_pb2.EmbeddingResult()
+        return backend_pb2.EmbeddingResult(embeddings=outputs[0].outputs.embedding)
+
    async def PredictStream(self, request, context):
        """
        Generates text based on the given prompt and sampling parameters, and streams the results.
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.5
+grpcio==1.66.1
 protobuf
 certifi
 setuptools
--- a/backend/python/vllm/test.py
+++ b/backend/python/vllm/test.py
@@ -72,5 +72,28 @@ class TestBackendServicer(unittest.TestCase):
        except Exception as err:
            print(err)
            self.fail("text service failed")
+        finally:
+            self.tearDown()
+
+    def test_embedding(self):
+        """
+        This method tests if the embeddings are generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="intfloat/e5-mistral-7b-instruct"))
+                self.assertTrue(response.success)
+                embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
+                embedding_response = stub.Embedding(embedding_request)
+                self.assertIsNotNone(embedding_response.embeddings)
+                # assert that is a list of floats
+                self.assertIsInstance(embedding_response.embeddings, list)
+                # assert that the list is not empty
+                self.assertTrue(len(embedding_response.embeddings) > 0)
+        except Exception as err:
+            print(err)
+            self.fail("Embedding service failed")
        finally:
            self.tearDown()
--- a/core/backend/backend_suite_test.go
+++ b/core/backend/backend_suite_test.go
@@ -0,0 +1,13 @@
+package backend_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestBackend(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Backend test suite")
+}
--- a/core/backend/llm.go
+++ b/core/backend/llm.go
@@ -9,6 +9,8 @@ import (
 	"sync"
 	"unicode/utf8"

+	"github.com/rs/zerolog/log"
+
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"

@@ -87,7 +89,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
 			case string:
 				protoMessages[i].Content = ct
 			default:
-				return nil, fmt.Errorf("Unsupported type for schema.Message.Content for inference: %T", ct)
+				return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
 			}
 		}
 	}
@@ -181,13 +183,37 @@ func Finetune(config config.BackendConfig, input, prediction string) string {
 		mu.Lock()
 		reg, ok := cutstrings[c]
 		if !ok {
-			cutstrings[c] = regexp.MustCompile(c)
+			r, err := regexp.Compile(c)
+			if err != nil {
+				log.Fatal().Err(err).Msg("failed to compile regex")
+			}
+			cutstrings[c] = r
 			reg = cutstrings[c]
 		}
 		mu.Unlock()
 		prediction = reg.ReplaceAllString(prediction, "")
 	}

+	// extract results from the response which can be for instance inside XML tags
+	var predResult string
+	for _, r := range config.ExtractRegex {
+		mu.Lock()
+		reg, ok := cutstrings[r]
+		if !ok {
+			regex, err := regexp.Compile(r)
+			if err != nil {
+				log.Fatal().Err(err).Msg("failed to compile regex")
+			}
+			cutstrings[r] = regex
+			reg = regex
+		}
+		mu.Unlock()
+		predResult += reg.FindString(prediction)
+	}
+	if predResult != "" {
+		prediction = predResult
+	}
+
 	for _, c := range config.TrimSpace {
 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 	}
--- a/core/backend/llm_test.go
+++ b/core/backend/llm_test.go
@@ -0,0 +1,109 @@
+package backend_test
+
+import (
+	. "github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("LLM tests", func() {
+	Context("Finetune LLM output", func() {
+		var (
+			testConfig config.BackendConfig
+			input      string
+			prediction string
+			result     string
+		)
+
+		BeforeEach(func() {
+			testConfig = config.BackendConfig{
+				PredictionOptions: schema.PredictionOptions{
+					Echo: false,
+				},
+				LLMConfig: config.LLMConfig{
+					Cutstrings:   []string{`<.*?>`},                  // Example regex for removing XML tags
+					ExtractRegex: []string{`<result>(.*?)</result>`}, // Example regex to extract from tags
+					TrimSpace:    []string{" ", "\n"},
+					TrimSuffix:   []string{".", "!"},
+				},
+			}
+		})
+
+		Context("when echo is enabled", func() {
+			BeforeEach(func() {
+				testConfig.Echo = true
+				input = "Hello"
+				prediction = "World"
+			})
+
+			It("should prepend input to prediction", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("HelloWorld"))
+			})
+		})
+
+		Context("when echo is disabled", func() {
+			BeforeEach(func() {
+				testConfig.Echo = false
+				input = "Hello"
+				prediction = "World"
+			})
+
+			It("should not modify the prediction with input", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("World"))
+			})
+		})
+
+		Context("when cutstrings regex is applied", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "<div>Hello</div> World"
+			})
+
+			It("should remove substrings matching cutstrings regex", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("Hello World"))
+			})
+		})
+
+		Context("when extract regex is applied", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "<response><result>42</result></response>"
+			})
+
+			It("should extract substrings matching the extract regex", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("42"))
+			})
+		})
+
+		Context("when trimming spaces", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "   Hello World   "
+			})
+
+			It("should trim spaces from the prediction", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("Hello World"))
+			})
+		})
+
+		Context("when trimming suffixes", func() {
+			BeforeEach(func() {
+				input = ""
+				prediction = "Hello World."
+			})
+
+			It("should trim suffixes from the prediction", func() {
+				result = Finetune(testConfig, input, prediction)
+				Expect(result).To(Equal("Hello World"))
+			})
+		})
+	})
+})
--- a/core/backend/soundgeneration.go
+++ b/core/backend/soundgeneration.go
@@ -0,0 +1,74 @@
+package backend
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/grpc/proto"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/utils"
+)
+
+func SoundGeneration(
+	backend string,
+	modelFile string,
+	text string,
+	duration *float32,
+	temperature *float32,
+	doSample *bool,
+	sourceFile *string,
+	sourceDivisor *int32,
+	loader *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	backendConfig config.BackendConfig,
+) (string, *proto.Result, error) {
+	if backend == "" {
+		return "", nil, fmt.Errorf("backend is a required parameter")
+	}
+
+	grpcOpts := gRPCModelOpts(backendConfig)
+	opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
+		model.WithBackendString(backend),
+		model.WithModel(modelFile),
+		model.WithContext(appConfig.Context),
+		model.WithAssetDir(appConfig.AssetsDestination),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
+	})
+
+	soundGenModel, err := loader.BackendLoader(opts...)
+	if err != nil {
+		return "", nil, err
+	}
+
+	if soundGenModel == nil {
+		return "", nil, fmt.Errorf("could not load sound generation model")
+	}
+
+	if err := os.MkdirAll(appConfig.AudioDir, 0750); err != nil {
+		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
+	}
+
+	fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "sound_generation", ".wav")
+	filePath := filepath.Join(appConfig.AudioDir, fileName)
+
+	res, err := soundGenModel.SoundGeneration(context.Background(), &proto.SoundGenerationRequest{
+		Text:        text,
+		Model:       modelFile,
+		Dst:         filePath,
+		Sample:      doSample,
+		Duration:    duration,
+		Temperature: temperature,
+		Src:         sourceFile,
+		SrcDivisor:  sourceDivisor,
+	})
+
+	// return RPC error if any
+	if !res.Success {
+		return "", nil, fmt.Errorf(res.Message)
+	}
+
+	return filePath, res, err
+}
--- a/core/backend/transcript.go
+++ b/core/backend/transcript.go
@@ -3,12 +3,13 @@ package backend
 import (
 	"context"
 	"fmt"
+	"time"

 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/schema"

 	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 )

 func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
@@ -21,19 +22,40 @@ func ModelTranscription(audio, language string, translate bool, ml *model.ModelL
 		model.WithAssetDir(appConfig.AssetsDestination),
 	})

-	whisperModel, err := ml.BackendLoader(opts...)
+	transcriptionModel, err := ml.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}

-	if whisperModel == nil {
-		return nil, fmt.Errorf("could not load whisper model")
+	if transcriptionModel == nil {
+		return nil, fmt.Errorf("could not load transcription model")
 	}

-	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
+	r, err := transcriptionModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
 		Dst:       audio,
 		Language:  language,
 		Translate: translate,
 		Threads:   uint32(*backendConfig.Threads),
 	})
+	if err != nil {
+		return nil, err
+	}
+	tr := &schema.TranscriptionResult{
+		Text: r.Text,
+	}
+	for _, s := range r.Segments {
+		var tks []int
+		for _, t := range s.Tokens {
+			tks = append(tks, int(t))
+		}
+		tr.Segments = append(tr.Segments,
+			schema.Segment{
+				Text:   s.Text,
+				Id:     int(s.Id),
+				Start:  time.Duration(s.Start),
+				End:    time.Duration(s.End),
+				Tokens: tks,
+			})
+	}
+	return tr, err
 }
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -9,31 +9,15 @@ import (
 	"github.com/mudler/LocalAI/core/config"

 	"github.com/mudler/LocalAI/pkg/grpc/proto"
-	model "github.com/mudler/LocalAI/pkg/model"
+	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
 )

-func generateUniqueFileName(dir, baseName, ext string) string {
-	counter := 1
-	fileName := baseName + ext
-
-	for {
-		filePath := filepath.Join(dir, fileName)
-		_, err := os.Stat(filePath)
-		if os.IsNotExist(err) {
-			return fileName
-		}
-
-		counter++
-		fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
-	}
-}
-
 func ModelTTS(
 	backend,
 	text,
 	modelFile,
-	voice ,
+	voice,
 	language string,
 	loader *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
@@ -66,7 +50,7 @@ func ModelTTS(
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}

-	fileName := generateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
+	fileName := utils.GenerateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
 	filePath := filepath.Join(appConfig.AudioDir, fileName)

 	// If the model file is not empty, we pass it joined with the model path
@@ -88,12 +72,15 @@ func ModelTTS(
 	}

 	res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
-		Text:  text,
-		Model: modelPath,
-		Voice: voice,
-		Dst:   filePath,
+		Text:     text,
+		Model:    modelPath,
+		Voice:    voice,
+		Dst:      filePath,
 		Language: &language,
 	})
+	if err != nil {
+		return "", nil, err
+	}

 	// return RPC error if any
 	if !res.Success {
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -8,12 +8,13 @@ import (
 var CLI struct {
 	cliContext.Context `embed:""`

-	Run        RunCMD        `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
-	Federated  FederatedCLI  `cmd:"" help:"Run LocalAI in federated mode"`
-	Models     ModelsCMD     `cmd:"" help:"Manage LocalAI models and definitions"`
-	TTS        TTSCMD        `cmd:"" help:"Convert text to speech"`
-	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
-	Worker     worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
-	Util       UtilCMD       `cmd:"" help:"Utility commands"`
-	Explorer   ExplorerCMD   `cmd:"" help:"Run p2p explorer"`
+	Run             RunCMD             `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
+	Federated       FederatedCLI       `cmd:"" help:"Run LocalAI in federated mode"`
+	Models          ModelsCMD          `cmd:"" help:"Manage LocalAI models and definitions"`
+	TTS             TTSCMD             `cmd:"" help:"Convert text to speech"`
+	SoundGeneration SoundGenerationCMD `cmd:"" help:"Generates audio files from text or audio"`
+	Transcript      TranscriptCMD      `cmd:"" help:"Convert audio to text"`
+	Worker          worker.Worker      `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
+	Util            UtilCMD            `cmd:"" help:"Utility commands"`
+	Explorer        ExplorerCMD        `cmd:"" help:"Run p2p explorer"`
 }
--- a/core/cli/soundgeneration.go
+++ b/core/cli/soundgeneration.go
@@ -0,0 +1,110 @@
+package cli
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/backend"
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+type SoundGenerationCMD struct {
+	Text []string `arg:""`
+
+	Backend                string   `short:"b" required:"" help:"Backend to run the SoundGeneration model"`
+	Model                  string   `short:"m" required:"" help:"Model name to run the SoundGeneration"`
+	Duration               string   `short:"d" help:"If specified, the length of audio to generate in seconds"`
+	Temperature            string   `short:"t" help:"If specified, the temperature of the generation"`
+	InputFile              string   `short:"i" help:"If specified, the input file to condition generation upon"`
+	InputFileSampleDivisor string   `short:"f" help:"If InputFile and this divisor is specified, the first portion of the sample file will be used"`
+	DoSample               bool     `short:"s" default:"true" help:"Enables sampling from the model. Better quality at the cost of speed. Defaults to enabled."`
+	OutputFile             string   `short:"o" type:"path" help:"The path to write the output wav file"`
+	ModelsPath             string   `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
+	BackendAssetsPath      string   `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	ExternalGRPCBackends   []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
+}
+
+func parseToFloat32Ptr(input string) *float32 {
+	f, err := strconv.ParseFloat(input, 32)
+	if err != nil {
+		return nil
+	}
+	f2 := float32(f)
+	return &f2
+}
+
+func parseToInt32Ptr(input string) *int32 {
+	i, err := strconv.ParseInt(input, 10, 32)
+	if err != nil {
+		return nil
+	}
+	i2 := int32(i)
+	return &i2
+}
+
+func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
+	outputFile := t.OutputFile
+	outputDir := t.BackendAssetsPath
+	if outputFile != "" {
+		outputDir = filepath.Dir(outputFile)
+	}
+
+	text := strings.Join(t.Text, " ")
+
+	externalBackends := make(map[string]string)
+	// split ":" to get backend name and the uri
+	for _, v := range t.ExternalGRPCBackends {
+		backend := v[:strings.IndexByte(v, ':')]
+		uri := v[strings.IndexByte(v, ':')+1:]
+		externalBackends[backend] = uri
+		fmt.Printf("TMP externalBackends[%q]=%q\n\n", backend, uri)
+	}
+
+	opts := &config.ApplicationConfig{
+		ModelPath:            t.ModelsPath,
+		Context:              context.Background(),
+		AudioDir:             outputDir,
+		AssetsDestination:    t.BackendAssetsPath,
+		ExternalGRPCBackends: externalBackends,
+	}
+	ml := model.NewModelLoader(opts.ModelPath)
+
+	defer func() {
+		err := ml.StopAllGRPC()
+		if err != nil {
+			log.Error().Err(err).Msg("unable to stop all grpc processes")
+		}
+	}()
+
+	options := config.BackendConfig{}
+	options.SetDefaults()
+
+	var inputFile *string
+	if t.InputFile != "" {
+		inputFile = &t.InputFile
+	}
+
+	filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
+		parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
+		inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
+
+	if err != nil {
+		return err
+	}
+	if outputFile != "" {
+		if err := os.Rename(filePath, outputFile); err != nil {
+			return err
+		}
+		fmt.Printf("Generate file %s\n", outputFile)
+	} else {
+		fmt.Printf("Generate file %s\n", filePath)
+	}
+	return nil
+}
--- a/core/cli/worker/worker.go
+++ b/core/cli/worker/worker.go
@@ -2,6 +2,7 @@ package worker

 type WorkerFlags struct {
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
 }

 type Worker struct {
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -3,6 +3,7 @@ package worker
 import (
 	"fmt"
 	"os"
+	"strings"
 	"syscall"

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
@@ -12,7 +13,6 @@ import (
 )

 type LLamaCPP struct {
-	Args        []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
 	WorkerFlags `embed:""`
 }

@@ -34,9 +34,8 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 		"llama-cpp-rpc-server",
 	)

-	args := os.Args[4:]
+	args := strings.Split(r.ExtraLLamaCPPArgs, " ")
 	args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
-
 	args = append([]string{grpcProcess}, args...)
 	return syscall.Exec(
 		grpcProcess,
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"strings"
 	"time"

 	cliContext "github.com/mudler/LocalAI/core/cli/context"
@@ -20,12 +21,11 @@ import (

 type P2P struct {
 	WorkerFlags        `embed:""`
-	Token              string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
-	NoRunner           bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
-	RunnerAddress      string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
-	RunnerPort         string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
-	ExtraLLamaCPPArgs  []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
-	Peer2PeerNetworkID string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
+	Token              string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
+	NoRunner           bool   `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
+	RunnerAddress      string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
+	RunnerPort         string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
+	Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 }

 func (r *P2P) Run(ctx *cliContext.Context) error {
@@ -76,8 +76,8 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 					"util",
 					"llama-cpp-rpc-server",
 				)
-
-				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)
+				extraArgs := strings.Split(r.ExtraLLamaCPPArgs, " ")
+				args := append([]string{"--host", address, "--port", fmt.Sprint(port)}, extraArgs...)
 				args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)

 				cmd := exec.Command(
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -126,6 +126,7 @@ type LLMConfig struct {
 	Grammar         string   `yaml:"grammar"`
 	StopWords       []string `yaml:"stopwords"`
 	Cutstrings      []string `yaml:"cutstrings"`
+	ExtractRegex    []string `yaml:"extract_regex"`
 	TrimSpace       []string `yaml:"trimspace"`
 	TrimSuffix      []string `yaml:"trimsuffix"`

--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -772,6 +772,17 @@ var _ = Describe("API test", func() {
 			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
 		})

+		It("shows the external backend", func() {
+			// do an http request to the /system endpoint
+			resp, err := http.Get("http://127.0.0.1:9090/system")
+			Expect(err).ToNot(HaveOccurred())
+			Expect(resp.StatusCode).To(Equal(200))
+			dat, err := io.ReadAll(resp.Body)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(string(dat)).To(ContainSubstring("huggingface"))
+			Expect(string(dat)).To(ContainSubstring("llama-cpp"))
+		})
+
 		It("transcribes audio", func() {
 			if runtime.GOOS != "linux" {
 				Skip("test supported only on linux")
--- a/core/http/endpoints/elevenlabs/soundgeneration.go
+++ b/core/http/endpoints/elevenlabs/soundgeneration.go
@@ -0,0 +1,65 @@
+package elevenlabs
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+	"github.com/rs/zerolog/log"
+)
+
+// SoundGenerationEndpoint is the ElevenLabs SoundGeneration endpoint https://elevenlabs.io/docs/api-reference/sound-generation
+// @Summary Generates audio from the input text.
+// @Param request body schema.ElevenLabsSoundGenerationRequest true "query params"
+// @Success 200 {string} binary	 "Response"
+// @Router /v1/sound-generation [post]
+func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		input := new(schema.ElevenLabsSoundGenerationRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.ModelID, false)
+		if err != nil {
+			modelFile = input.ModelID
+			log.Warn().Str("ModelID", input.ModelID).Msg("Model not found in context")
+		}
+
+		cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
+			config.LoadOptionDebug(appConfig.Debug),
+			config.LoadOptionThreads(appConfig.Threads),
+			config.LoadOptionContextSize(appConfig.ContextSize),
+			config.LoadOptionF16(appConfig.F16),
+		)
+		if err != nil {
+			modelFile = input.ModelID
+			log.Warn().Str("Request ModelID", input.ModelID).Err(err).Msg("error during LoadBackendConfigFileByName, using request ModelID")
+		} else {
+			if input.ModelID != "" {
+				modelFile = input.ModelID
+			} else {
+				modelFile = cfg.Model
+			}
+		}
+		log.Debug().Str("modelFile", "modelFile").Str("backend", cfg.Backend).Msg("Sound Generation Request about to be sent to backend")
+
+		if input.Duration != nil {
+			log.Debug().Float32("duration", *input.Duration).Msg("duration set")
+		}
+		if input.Temperature != nil {
+			log.Debug().Float32("temperature", *input.Temperature).Msg("temperature set")
+		}
+
+		// TODO: Support uploading files?
+		filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
+		if err != nil {
+			return err
+		}
+		return c.Download(filePath)
+
+	}
+}
--- a/core/http/endpoints/localai/system.go
+++ b/core/http/endpoints/localai/system.go
@@ -0,0 +1,29 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/model"
+)
+
+// SystemInformations returns the system informations
+// @Summary Show the LocalAI instance information
+// @Success 200 {object} schema.SystemInformationResponse "Response"
+// @Router /system [get]
+func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		availableBackends, err := ml.ListAvailableBackends(appConfig.AssetsDestination)
+		if err != nil {
+			return err
+		}
+		for b := range appConfig.ExternalGRPCBackends {
+			availableBackends = append(availableBackends, b)
+		}
+		return c.JSON(
+			schema.SystemInformationResponse{
+				Backends: availableBackends,
+			},
+		)
+	}
+}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -68,9 +68,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup

 		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
 		result = functions.CleanupLLMResult(result, config.FunctionsConfig)
-		results := functions.ParseFunctionCall(result, config.FunctionsConfig)
+		functionResults := functions.ParseFunctionCall(result, config.FunctionsConfig)
 		log.Debug().Msgf("Text content to return: %s", textContentToReturn)
-		noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
+		noActionToRun := len(functionResults) > 0 && functionResults[0].Name == noAction || len(functionResults) == 0

 		switch {
 		case noActionToRun:
@@ -83,7 +83,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}
 			responses <- initialMessage

-			result, err := handleQuestion(config, req, ml, startupOptions, results, result, prompt)
+			result, err := handleQuestion(config, req, ml, startupOptions, functionResults, result, prompt)
 			if err != nil {
 				log.Error().Err(err).Msg("error handling question")
 				return
@@ -105,7 +105,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			responses <- resp

 		default:
-			for i, ss := range results {
+			for i, ss := range functionResults {
 				name, args := ss.Name, ss.Arguments

 				initialMessage := schema.OpenAIResponse{
--- a/core/http/routes/elevenlabs.go
+++ b/core/http/routes/elevenlabs.go
@@ -16,4 +16,6 @@ func RegisterElevenLabsRoutes(app *fiber.App,
 	// Elevenlabs
 	app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))

+	app.Post("/v1/sound-generation", auth, elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
+
 }
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -70,4 +70,6 @@ func RegisterLocalAIRoutes(app *fiber.App,
 		}{Version: internal.PrintableVersion()})
 	})

+	app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
+
 }
--- a/core/schema/elevenlabs.go
+++ b/core/schema/elevenlabs.go
@@ -4,3 +4,11 @@ type ElevenLabsTTSRequest struct {
 	Text    string `json:"text" yaml:"text"`
 	ModelID string `json:"model_id" yaml:"model_id"`
 }
+
+type ElevenLabsSoundGenerationRequest struct {
+	Text        string   `json:"text" yaml:"text"`
+	ModelID     string   `json:"model_id" yaml:"model_id"`
+	Duration    *float32 `json:"duration_seconds,omitempty" yaml:"duration_seconds,omitempty"`
+	Temperature *float32 `json:"prompt_influence,omitempty" yaml:"prompt_influence,omitempty"`
+	DoSample    *bool    `json:"do_sample,omitempty" yaml:"do_sample,omitempty"`
+}
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -70,3 +70,7 @@ type P2PNodesResponse struct {
 	Nodes          []p2p.NodeData `json:"nodes" yaml:"nodes"`
 	FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
 }
+
+type SystemInformationResponse struct {
+	Backends []string `json:"backends"`
+}
--- a/core/services/backend_monitor.go
+++ b/core/services/backend_monitor.go
@@ -107,7 +107,7 @@ func (bms BackendMonitorService) CheckAndSample(modelName string) (*proto.Status
 		return nil, err
 	}
 	modelAddr := bms.modelLoader.CheckIsLoaded(backendId)
-	if modelAddr == "" {
+	if modelAddr == nil {
 		return nil, fmt.Errorf("backend %s is not currently loaded", backendId)
 	}

--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -133,6 +133,10 @@ Due to the nature of ROCm it is best to run all implementations in containers as
 Ongoing verification testing of ROCm compatability with integrated backends.
 Please note the following list of verified backends and devices.

+LocalAI hipblas images are built against the following targets: gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
+
+If your device is not one of these you must specify the corresponding `GPU_TARGETS` and specify `REBUILD=true`. Otherwise you don't need to specify these in the commands below.
+
 ### Verified 

 The devices in the following list have been tested with `hipblas` images running `ROCm 6.0.0`
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -68,7 +68,7 @@ And navigate the WebUI to the "Swarm" section to see the instructions to connect
 To start workers for distributing the computational load, run:

 ```bash
-local-ai worker llama-cpp-rpc <listening_address> <listening_port>
+local-ai worker llama-cpp-rpc --llama-cpp-args="-H <listening_address> -p <listening_port> -m <memory>" 
 ```

 And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:
@@ -98,7 +98,7 @@ To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKE
 2. Start the workers. Copy the `local-ai` binary to other hosts and run as many workers as needed using the token:

 ```bash
-TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc
+TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc --llama-cpp-args="-m <memory>" 
 # 1:06AM INF loading environment variables from file envFile=.env
 # 1:06AM INF Setting logging to info
 # {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"}
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -12,7 +12,7 @@ icon = "rocket_launch"

 **Security considerations**

-If you are exposing LocalAI remotely, make sure you protect the API endpoints adeguately with a mechanism which allows to protect from the incoming traffic or alternatively, run LocalAI with `API_KEY` to gate the access with an API key. The API key guarantees a total access to the features (there is no role separation), and it is to be considered as likely as an admin role.
+If you are exposing LocalAI remotely, make sure you protect the API endpoints adequately with a mechanism which allows to protect from the incoming traffic or alternatively, run LocalAI with `API_KEY` to gate the access with an API key. The API key guarantees a total access to the features (there is no role separation), and it is to be considered as likely as an admin role.

 To access the WebUI with an API_KEY, browser extensions such as [Requestly](https://requestly.com/) can be used (see also https://github.com/mudler/LocalAI/issues/2227#issuecomment-2093333752). See also [API flags]({{% relref "docs/advanced/advanced-usage#api-flags" %}}) for the flags / options available when starting LocalAI.

--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -51,9 +51,7 @@ icon = "info"

 > 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
 >
-> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
-
-
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [🖼️ Models](https://models.localai.io/) [🚀 Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [🥽 Demo](https://demo.localai.io) [🌍 Explorer](https://explorer.localai.io) [🛫 Examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) 


 **LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.4"
+  "version": "v2.20.1"
 }
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
--- a/Generation/musicgen.bru
+++ b/Generation/musicgen.bru
@@ -0,0 +1,23 @@
+meta {
+  name: musicgen
+  type: http
+  seq: 1
+}
+
+post {
+  url: {{PROTOCOL}}{{HOST}}:{{PORT}}/v1/sound-generation
+  body: json
+  auth: none
+}
+
+headers {
+  Content-Type: application/json
+}
+
+body:json {
+  {
+      "model_id": "facebook/musicgen-small",
+      "text": "Exciting 80s Newscast Interstitial",
+      "duration_seconds": 8
+  }
+}
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.10.65
+llama_index==0.11.7
 requests==2.32.3
 weaviate_client==4.6.7
 transformers
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.14
-openai==1.41.1
+langchain==0.2.16
+openai==1.44.0
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.14
-openai==1.40.5
+langchain==0.2.16
+openai==1.44.1
 chromadb==0.5.5
-llama-index==0.10.67.post1
+llama-index==0.11.7
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -2,23 +2,23 @@ aiohttp==3.10.3
 aiosignal==1.3.1
 async-timeout==4.0.3
 attrs==24.2.0
-certifi==2024.7.4
+certifi==2024.8.30
 charset-normalizer==3.3.2
 colorama==0.4.6
 dataclasses-json==0.6.7
 debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
-idna==3.7
-langchain==0.2.14
-langchain-community==0.2.12
-marshmallow==3.21.3
+idna==3.8
+langchain==0.2.16
+langchain-community==0.2.16
+marshmallow==3.22.0
 marshmallow-enum==1.5.1
 multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.1.0
-openai==1.41.1
+numpy==2.1.1
+openai==1.44.0
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2
@@ -30,4 +30,4 @@ tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.2.2
-yarl==1.9.4
+yarl==1.11.0
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.37.1
+streamlit==1.38.0
 requests
--- a/gallery/hermes-vllm.yaml
+++ b/gallery/hermes-vllm.yaml
@@ -0,0 +1,93 @@
+---
+name: "hermes-vllm"
+
+config_file: |
+    backend: vllm
+    parameters:
+      max_tokens: 8192
+    context_size: 8192
+    stopwords:
+    - "<|im_end|>"
+    - "<dummy32000>"
+    - "<|eot_id|>"
+    - "<|end_of_text|>"
+    function:
+      disable_no_action: true
+      grammar:
+        # Uncomment the line below to enable grammar matching for JSON results if the model is breaking
+        # the output. This will make the model more accurate and won't break the JSON output.
+        # This however, will make parallel_calls not functional (it is a known bug)
+        # mixed_mode: true
+        disable: true
+        parallel_calls: true
+        expect_strings_after_json: true
+      json_regex_match:
+      - "(?s)<tool_call>(.*?)</tool_call>"
+      - "(?s)<tool_call>(.*)"
+      capture_llm_results:
+        - (?s)<scratchpad>(.*?)</scratchpad>
+      replace_llm_results:
+        - key: (?s)<scratchpad>(.*?)</scratchpad>
+          value: ""
+
+    template:
+      use_tokenizer_template: true
+      chat: |
+        {{.Input -}}
+        <|im_start|>assistant
+      chat_message: |
+        <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+        {{- if .FunctionCall }}
+        <tool_call>
+        {{- else if eq .RoleName "tool" }}
+        <tool_response>
+        {{- end }}
+        {{- if .Content}}
+        {{.Content }}
+        {{- end }}
+        {{- if .FunctionCall}}
+        {{toJson .FunctionCall}}
+        {{- end }}
+        {{- if .FunctionCall }}
+        </tool_call>
+        {{- else if eq .RoleName "tool" }}
+        </tool_response>
+        {{- end }}<|im_end|>
+      completion: |
+        {{.Input}}
+      function: |
+        <|im_start|>system
+        You are a function calling AI model.
+        Here are the available tools:
+        <tools>
+        {{range .Functions}}
+        {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+        {{end}}
+        </tools>
+        You should call the tools provided to you sequentially
+        Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
+        <scratchpad>
+        {step-by-step reasoning and plan in bullet points}
+        </scratchpad>
+        For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
+        <tool_call>
+        {"arguments": <args-dict>, "name": <function-name>}
+        </tool_call><|im_end|>
+        {{.Input -}}
+        <|im_start|>assistant
+# Uncomment to specify a quantization method (optional)
+# quantization: "awq"
+# Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
+# gpu_memory_utilization: 0.5
+# Uncomment to trust remote code from huggingface
+# trust_remote_code: true
+# Uncomment to enable eager execution
+# enforce_eager: true
+# Uncomment to specify the size of the CPU swap space per GPU (in GiB)
+# swap_space: 2
+# Uncomment to specify the maximum length of a sequence (including prompt and output)
+# max_model_len: 32768
+# Uncomment and specify the number of Tensor divisions.
+# Allows you to partition and run large models. Performance gains are limited.
+# https://github.com/vllm-project/vllm/issues/1435
+# tensor_parallel_size: 2
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -27,8 +27,8 @@
    - filename: SmolLM-1.7B-Instruct.Q4_K_M.gguf
      sha256: 2b07eb2293ed3fc544a9858beda5bfb03dcabda6aa6582d3c85768c95f498d28
      uri: huggingface://MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF/SmolLM-1.7B-Instruct.Q4_K_M.gguf
-## LLama3.1
 - &llama31
+  ## LLama3.1
  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
  name: "meta-llama-3.1-8b-instruct"
@@ -85,6 +85,24 @@
    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-instruct:Q8_grammar-functioncall"
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
+  description: |
+    This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
+
+    When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
+    For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct.Q8_0.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct.Q8_0.gguf
+      sha256: f8d608c983b83a1bf28229bc9beb4294c91f5d4cbfe2c1829566b4d7c4693eeb
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q8_0.gguf
 - !!merge <<: *llama31
  name: "meta-llama-3.1-8b-claude-imat"
  urls:
@@ -389,6 +407,38 @@
    - filename: llama-3.1-storm-8b-q4_k_m.gguf
      sha256: d714e960211ee0fe6113d3131a6573e438f37debd07e1067d2571298624414a0
      uri: huggingface://mudler/Llama-3.1-Storm-8B-Q4_K_M-GGUF/llama-3.1-storm-8b-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "hubble-4b-v1"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/R8_o3CCpTgKv5Wnnry7E_.png
+  urls:
+    - https://huggingface.co/TheDrummer/Hubble-4B-v1-GGUF
+  description: |
+    Equipped with his five senses, man explores the universe around him and calls the adventure 'Science'.
+    This is a finetune of Nvidia's Llama 3.1 4B Minitron - a shrunk down model of Llama 3.1 8B 128K.
+  overrides:
+    parameters:
+      model: Hubble-4B-v1-Q4_K_M.gguf
+  files:
+    - filename: Hubble-4B-v1-Q4_K_M.gguf
+      uri: huggingface://TheDrummer/Hubble-4B-v1-GGUF/Hubble-4B-v1-Q4_K_M.gguf
+      sha256: 0721294d0e861c6e6162a112fc7242e0c4b260c156137f4bcbb08667f1748080
+- !!merge <<: *llama31
+  name: "reflection-llama-3.1-70b"
+  urls:
+    - https://huggingface.co/leafspark/Reflection-Llama-3.1-70B-bf16
+    - https://huggingface.co/senseable/Reflection-Llama-3.1-70B-gguf
+  description: |
+      Reflection Llama-3.1 70B is (currently) the world's top open-source LLM, trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course.
+
+      The model was trained on synthetic data generated by Glaive. If you're training a model, Glaive is incredible — use them.
+  overrides:
+    parameters:
+      model: Reflection-Llama-3.1-70B-q4_k_m.gguf
+  files:
+    - filename: Reflection-Llama-3.1-70B-q4_k_m.gguf
+      sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2
+      uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf
 ## Uncensored models
 - !!merge <<: *llama31
  name: "humanish-roleplay-llama-3.1-8b-i1"
@@ -1114,7 +1164,7 @@
    - https://huggingface.co/TheDrummer/Rocinante-12B-v1.1-GGUF
    - https://huggingface.co/TheDrummer/Rocinante-12B-v1.1
  description: |
-     A versatile workhorse for any adventure!
+    A versatile workhorse for any adventure!
  overrides:
    parameters:
      model: Rocinante-12B-v1.1-Q4_K_M.gguf
@@ -1658,6 +1708,20 @@
    - filename: shieldgemma-9b.i1-Q4_K_M.gguf
      sha256: ffa7eaadcc0c7d0544fda5b0d86bba3ffa3431b673e5b2135f421cfe65bd8732
      uri: huggingface://mradermacher/shieldgemma-9b-i1-GGUF/shieldgemma-9b.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "athena-codegemma-2-2b-it"
+  urls:
+    - https://huggingface.co/EpistemeAI/Athena-codegemma-2-2b-it
+    - https://huggingface.co/mradermacher/Athena-codegemma-2-2b-it-GGUF
+  description: |
+      Supervised fine tuned (sft unsloth) for coding with EpistemeAI coding dataset.
+  overrides:
+    parameters:
+      model: Athena-codegemma-2-2b-it.Q4_K_M.gguf
+  files:
+    - filename: Athena-codegemma-2-2b-it.Q4_K_M.gguf
+      sha256: 59ce17023438b0da603dd211c7d39f78e7acac4108258ac0818a97a4ca7d64e3
+      uri: huggingface://mradermacher/Athena-codegemma-2-2b-it-GGUF/Athena-codegemma-2-2b-it.Q4_K_M.gguf
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1690,7 +1754,7 @@
  files:
    - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf
      uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf
-      sha256: 1977ae6185ef5bc476e27db85bb3d79ca4bd87e7b03399083c297d9c612d334c
+      sha256: 2b4675c2208f09ad8762d8cf1b6a4a26bf65e6f0641aba324ec65143c0b4ad9f
 - !!merge <<: *llama3
  name: "llama3-8b-instruct:Q6_K"
  overrides:
@@ -1699,7 +1763,7 @@
  files:
    - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf
      uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf
-      sha256: d1cdc49a716674c8e2506039bef85b905376cdaafc33e449b5aa2cf88f9532ad
+      sha256: bd7efd73f9fb67e4b9ecc43f861f37c7e594e78a8a5ff9c29da021692bd243ef
 - !!merge <<: *llama3
  name: "llama-3-8b-instruct-abliterated"
  urls:
@@ -3142,6 +3206,121 @@
    - filename: Master-Yi-9B_Q4_K_M.gguf
      sha256: 57e2afcf9f24d7138a3b8e2b547336d7edc13621a5e8090bc196d7de360b2b45
      uri: huggingface://qnguyen3/Master-Yi-9B-GGUF/Master-Yi-9B_Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "magnum-v3-34b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/658a46cbfb9c2bdfae75b3a6/9yEmnTDG9bcC_bxwuDU6G.png
+  urls:
+    - https://huggingface.co/anthracite-org/magnum-v3-34b
+    - https://huggingface.co/bartowski/magnum-v3-34b-GGUF
+  description: |
+    This is the 9th in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus.
+
+    This model is fine-tuned on top of Yi-1.5-34 B-32 K.
+  overrides:
+    parameters:
+      model: magnum-v3-34b-Q4_K_M.gguf
+  files:
+    - filename: magnum-v3-34b-Q4_K_M.gguf
+      sha256: f902956c0731581f1ff189e547e6e5aad86b77af5f4dc7e4fc26bcda5c1f7cc3
+      uri: huggingface://bartowski/magnum-v3-34b-GGUF/magnum-v3-34b-Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "yi-coder-9b-chat"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-9B-Chat
+    - https://huggingface.co/bartowski/Yi-Coder-9B-Chat-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-9B-Chat-Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-9B-Chat-Q4_K_M.gguf
+      sha256: 251cc196e3813d149694f362bb0f8f154f3320abe44724eebe58c23dc54f201d
+      uri: huggingface://bartowski/Yi-Coder-9B-Chat-GGUF/Yi-Coder-9B-Chat-Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  name: "yi-coder-1.5b-chat"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-1.5B-Chat
+    - https://huggingface.co/MaziyarPanahi/Yi-Coder-1.5B-Chat-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-1.5B-Chat.Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-1.5B-Chat.Q4_K_M.gguf
+      sha256: e2e8fa659cd75c828d7783b5c2fb60d220e08836065901fad8edb48e537c1cec
+      uri: huggingface://MaziyarPanahi/Yi-Coder-1.5B-Chat-GGUF/Yi-Coder-1.5B-Chat.Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+  name: "yi-coder-1.5b"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-1.5B
+    - https://huggingface.co/QuantFactory/Yi-Coder-1.5B-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-1.5B.Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-1.5B.Q4_K_M.gguf
+      sha256: 86a280dd36c9b2342b7023532f9c2c287e251f5cd10bc81ca262db8c1668f272
+      uri: huggingface://QuantFactory/Yi-Coder-1.5B-GGUF/Yi-Coder-1.5B.Q4_K_M.gguf
+- !!merge <<: *yi-chat
+  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+  name: "yi-coder-9b"
+  urls:
+    - https://huggingface.co/01-ai/Yi-Coder-9B
+    - https://huggingface.co/QuantFactory/Yi-Coder-9B-GGUF
+    - https://01-ai.github.io/
+    - https://github.com/01-ai/Yi-Coder
+  description: |
+    Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
+    Key features:
+
+        Excelling in long-context understanding with a maximum context length of 128K tokens.
+        Supporting 52 major programming languages:
+
+      'java', 'markdown', 'python', 'php', 'javascript', 'c++', 'c#', 'c', 'typescript', 'html', 'go', 'java_server_pages', 'dart', 'objective-c', 'kotlin', 'tex', 'swift', 'ruby', 'sql', 'rust', 'css', 'yaml', 'matlab', 'lua', 'json', 'shell', 'visual_basic', 'scala', 'rmarkdown', 'pascal', 'fortran', 'haskell', 'assembly', 'perl', 'julia', 'cmake', 'groovy', 'ocaml', 'powershell', 'elixir', 'clojure', 'makefile', 'coffeescript', 'erlang', 'lisp', 'toml', 'batchfile', 'cobol', 'dockerfile', 'r', 'prolog', 'verilog'
+
+    For model details and benchmarks, see Yi-Coder blog and Yi-Coder README.
+  overrides:
+    parameters:
+      model: Yi-Coder-9B.Q4_K_M.gguf
+  files:
+    - filename: Yi-Coder-9B.Q4_K_M.gguf
+      sha256: cff3db8a69c43654e3c2d2984e86ad2791d1d446ec56b24a636ba1ce78363308
+      uri: huggingface://QuantFactory/Yi-Coder-9B-GGUF/Yi-Coder-9B.Q4_K_M.gguf
 - &vicuna-chat
  ## LLama2 and derivatives
  ### Start Fimbulvetr
@@ -4555,7 +4734,7 @@
    - filename: phillama-3.8b-v0.1.Q4_K_M.gguf
      sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2
      uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf
- !!merge <<: *llama3
+- !!merge <<: *phi-3
  name: "calme-2.3-phi3-4b"
  icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp
  urls:
@@ -4572,6 +4751,35 @@
    - filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
      sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127
      uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3.5-mini-instruct"
+  urls:
+    - https://huggingface.co/microsoft/Phi-3.5-mini-instruct
+    - https://huggingface.co/MaziyarPanahi/Phi-3.5-mini-instruct-GGUF
+  description: |
+    Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data. The model belongs to the Phi-3 model family and supports 128K token context length. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+  overrides:
+    parameters:
+      model: Phi-3.5-mini-instruct.Q4_K_M.gguf
+  files:
+    - filename: Phi-3.5-mini-instruct.Q4_K_M.gguf
+      sha256: 3f68916e850b107d8641d18bcd5548f0d66beef9e0a9077fe84ef28943eb7e88
+      uri: huggingface://MaziyarPanahi/Phi-3.5-mini-instruct-GGUF/Phi-3.5-mini-instruct.Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "calme-2.1-phi3.5-4b-i1"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3.5-4b/resolve/main/calme-2.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.1-phi3.5-4b
+    - https://huggingface.co/mradermacher/calme-2.1-phi3.5-4b-i1-GGUF
+  description: |
+    This model is a fine-tuned version of the microsoft/Phi-3.5-mini-instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications.
+  overrides:
+    parameters:
+      model: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
+  files:
+    - filename: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
+      sha256: 989eccacd52b6d9ebf2c06c35c363da19aadb125659a10df299b7130bc293e77
+      uri: huggingface://mradermacher/calme-2.1-phi3.5-4b-i1-GGUF/calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
 - &hermes-2-pro-mistral
  ### START Hermes
  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -4737,6 +4945,21 @@
    - filename: Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
      sha256: d4403ce5a6e930f4c2509456388c20d633a15ff08dd52ef3b142ff1810ec3553
      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
+- !!merge <<: *hermes-2-pro-mistral
+  name: "hermes-3-llama-3.1-8b:Q8"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/bMcZ3sNNQK8SRZpHXBmwM.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF
+  description: |
+    Hermes 3 is a generalist language model developed by Nous Research. It is an advanced agentic model with improved roleplaying, reasoning, multi-turn conversation, long context coherence, and generalist assistant capabilities. The model is built on top of the Llama-3 architecture and has been fine-tuned to achieve superior performance in various tasks. It is designed to be a powerful and reliable tool for solving complex problems and assisting users in achieving their goals. Hermes 3 can be used for a wide range of applications, including research, education, and personal assistant tasks. It is available on the Hugging Face model hub for easy access and integration into existing workflows.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-8B.Q8_0.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-8B.Q8_0.gguf
+      sha256: c77c263f78b2f56fbaddd3ef2af750fda6ebb4344a546aaa0bfdd546b1ca8d84
+      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q8_0.gguf
 - !!merge <<: *hermes-2-pro-mistral
  name: "hermes-3-llama-3.1-70b"
  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg
@@ -4752,6 +4975,53 @@
    - filename: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
      sha256: 955c2f42caade4278f3c9dbffa32bb74572652b20e49e5340e782de3585bbe3f
      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
+- !!merge <<: *hermes-2-pro-mistral
+  name: "hermes-3-llama-3.1-70b:Q5_K_M"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B-GGUF
+  description: |
+    Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-70B.Q5_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-70B.Q5_K_M.gguf
+      sha256: 10ae3e0441b14c4a6476436f3c14e8bcacc7928aa3e8ce978d053287289a7ebb
+      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q5_K_M.gguf
+- &hermes-vllm
+  url: "github:mudler/LocalAI/gallery/hermes-vllm.yaml@master"
+  name: "hermes-3-llama-3.1-8b:vllm"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg
+  tags:
+    - llm
+    - vllm
+    - gpu
+    - function-calling
+  license: llama-3
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B
+  description: |
+    Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
+  overrides:
+    parameters:
+      model: NousResearch/Hermes-3-Llama-3.1-8B
+- !!merge <<: *hermes-vllm
+  name: "hermes-3-llama-3.1-70b:vllm"
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B
+  overrides:
+    parameters:
+      model: NousResearch/Hermes-3-Llama-3.1-70B
+- !!merge <<: *hermes-vllm
+  name: "hermes-3-llama-3.1-405b:vllm"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/-kj_KflXsdpcZoTQsvx7W.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-405B
+  overrides:
+    parameters:
+      model: NousResearch/Hermes-3-Llama-3.1-405B
 - !!merge <<: *hermes-2-pro-mistral
  name: "biomistral-7b"
  description: |
@@ -5745,6 +6015,16 @@
    - filename: voice-it-riccardo_fasol-x-low.tar.gz
      uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-it-riccardo_fasol-x-low.tar.gz
      sha256: 394b27b8780f5167e73a62ac103839cc438abc7edb544192f965e5b8f5f4acdb
+- !!merge <<: *piper
+  url: github:mudler/LocalAI/gallery/piper.yaml@master
+  name: voice-it-paola-medium
+  overrides:
+    parameters:
+      model: it-paola-medium.onnx
+  files:
+    - filename: voice-it-paola-medium.tar.gz
+      uri: https://github.com/fakezeta/piper-paola-voice/releases/download/v1.0.0/voice-it-paola-medium.tar.gz
+      sha256: 61d3bac0ff6d347daea5464c4b3ae156a450b603a916cc9ed7deecdeba17153a
 - !!merge <<: *piper
  url: github:mudler/LocalAI/gallery/piper.yaml@master
  name: voice-kk-iseke-x-low
--- a/gallery/vllm.yaml
+++ b/gallery/vllm.yaml
@@ -0,0 +1,32 @@
+---
+name: "vllm"
+
+config_file: |
+    context_size: 8192
+    parameters:
+      max_tokens: 8192
+    backend: vllm
+    function:
+      disable_no_action: true
+      grammar:
+        disable: true
+        parallel_calls: true
+        expect_strings_after_json: true
+    template:
+      use_tokenizer_template: true
+    # Uncomment to specify a quantization method (optional)
+    # quantization: "awq"
+    # Uncomment to limit the GPU memory utilization (vLLM default is 0.9 for 90%)
+    # gpu_memory_utilization: 0.5
+    # Uncomment to trust remote code from huggingface
+    # trust_remote_code: true
+    # Uncomment to enable eager execution
+    # enforce_eager: true
+    # Uncomment to specify the size of the CPU swap space per GPU (in GiB)
+    # swap_space: 2
+    # Uncomment to specify the maximum length of a sequence (including prompt and output)
+    # max_model_len: 32768
+    # Uncomment and specify the number of Tensor divisions.
+    # Allows you to partition and run large models. Performance gains are limited.
+    # https://github.com/vllm-project/vllm/issues/1435
+    # tensor_parallel_size: 2
--- a/go.mod
+++ b/go.mod
@@ -9,8 +9,10 @@ require (
 	github.com/M0Rf30/go-tiny-dream v0.0.0-20240425104733-c04fa463ace9
 	github.com/Masterminds/sprig/v3 v3.2.3
 	github.com/alecthomas/kong v0.9.0
+	github.com/census-instrumentation/opencensus-proto v0.4.1
 	github.com/charmbracelet/glamour v0.7.0
 	github.com/chasefleming/elem-go v0.26.0
+	github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b
 	github.com/containerd/containerd v1.7.19
 	github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44
 	github.com/elliotchance/orderedmap/v2 v2.2.0
@@ -22,8 +24,11 @@ require (
 	github.com/gofiber/fiber/v2 v2.52.5
 	github.com/gofiber/swagger v1.0.0
 	github.com/gofiber/template/html/v2 v2.1.2
+	github.com/gofrs/flock v0.12.1
+	github.com/golang/protobuf v1.5.4
 	github.com/google/go-containerregistry v0.19.2
 	github.com/google/uuid v1.6.0
+	github.com/grpc-ecosystem/grpc-gateway v1.5.0
 	github.com/hpcloud/tail v1.0.0
 	github.com/ipfs/go-log v1.0.5
 	github.com/jaypipes/ghw v0.12.0
@@ -32,11 +37,10 @@ require (
 	github.com/libp2p/go-libp2p v0.36.2
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.26
-	github.com/mudler/edgevpn v0.27.3
-	github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
+	github.com/mudler/edgevpn v0.28.3
+	github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82
 	github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
-	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20240606155928-41c9013fa46a
-	github.com/onsi/ginkgo/v2 v2.20.0
+	github.com/onsi/ginkgo/v2 v2.20.1
 	github.com/onsi/gomega v1.34.1
 	github.com/ory/dockertest/v3 v3.10.0
 	github.com/otiai10/openaigo v1.7.0
@@ -56,6 +60,7 @@ require (
 	go.opentelemetry.io/otel/exporters/prometheus v0.50.0
 	go.opentelemetry.io/otel/metric v1.28.0
 	go.opentelemetry.io/otel/sdk/metric v1.28.0
+	google.golang.org/api v0.180.0
 	google.golang.org/grpc v1.65.0
 	google.golang.org/protobuf v1.34.2
 	gopkg.in/yaml.v2 v2.4.0
@@ -64,10 +69,18 @@ require (
 )

 require (
+	cel.dev/expr v0.15.0 // indirect
+	cloud.google.com/go/auth v0.4.1 // indirect
+	cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
+	cloud.google.com/go/compute/metadata v0.3.0 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
+	github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
-	github.com/gofrs/flock v0.12.1 // indirect
+	github.com/google/s2a-go v0.1.7 // indirect
+	github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
+	github.com/googleapis/gax-go/v2 v2.12.4 // indirect
 	github.com/labstack/echo/v4 v4.12.0 // indirect
 	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
@@ -89,11 +102,15 @@ require (
 	github.com/pion/turn/v2 v2.1.6 // indirect
 	github.com/pion/webrtc/v3 v3.3.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
+	github.com/shirou/gopsutil/v4 v4.24.7 // indirect
 	github.com/urfave/cli/v2 v2.27.4 // indirect
 	github.com/valyala/fasttemplate v1.2.2 // indirect
 	github.com/wlynxg/anet v0.0.4 // indirect
 	github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0 // indirect
 	go.uber.org/mock v0.4.0 // indirect
+	golang.org/x/oauth2 v0.21.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
 )

 require (
@@ -140,7 +157,7 @@ require (
 	github.com/go-audio/riff v1.0.0 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
-	github.com/go-ole/go-ole v1.2.6 // indirect
+	github.com/go-ole/go-ole v1.3.0 // indirect
 	github.com/go-openapi/jsonpointer v0.21.0 // indirect
 	github.com/go-openapi/jsonreference v0.21.0 // indirect
 	github.com/go-openapi/spec v0.21.0 // indirect
@@ -167,11 +184,11 @@ require (
 	github.com/huandu/xstrings v1.3.3 // indirect
 	github.com/huin/goupnp v1.3.0 // indirect
 	github.com/imdario/mergo v0.3.16 // indirect
-	github.com/ipfs/boxo v0.10.0 // indirect
+	github.com/ipfs/boxo v0.21.0 // indirect
 	github.com/ipfs/go-cid v0.4.1 // indirect
 	github.com/ipfs/go-datastore v0.6.0 // indirect
 	github.com/ipfs/go-log/v2 v2.5.1 // indirect
-	github.com/ipld/go-ipld-prime v0.20.0 // indirect
+	github.com/ipld/go-ipld-prime v0.21.0 // indirect
 	github.com/jackpal/go-nat-pmp v1.0.2 // indirect
 	github.com/jaypipes/pcidb v1.0.0 // indirect
 	github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
@@ -184,11 +201,11 @@ require (
 	github.com/libp2p/go-cidranger v1.1.0 // indirect
 	github.com/libp2p/go-flow-metrics v0.1.0 // indirect
 	github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect
-	github.com/libp2p/go-libp2p-kad-dht v0.25.2 // indirect
+	github.com/libp2p/go-libp2p-kad-dht v0.26.1 // indirect
 	github.com/libp2p/go-libp2p-kbucket v0.6.3 // indirect
 	github.com/libp2p/go-libp2p-pubsub v0.12.0 // indirect
 	github.com/libp2p/go-libp2p-record v0.2.0 // indirect
-	github.com/libp2p/go-libp2p-routing-helpers v0.7.2 // indirect
+	github.com/libp2p/go-libp2p-routing-helpers v0.7.4 // indirect
 	github.com/libp2p/go-msgio v0.3.0 // indirect
 	github.com/libp2p/go-nat v0.2.0 // indirect
 	github.com/libp2p/go-netroute v0.2.1 // indirect
@@ -196,7 +213,7 @@ require (
 	github.com/libp2p/go-yamux/v4 v4.0.1 // indirect
 	github.com/libp2p/zeroconf/v2 v2.2.0 // indirect
 	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
-	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
+	github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
@@ -242,7 +259,7 @@ require (
 	github.com/pkoukk/tiktoken-go v0.1.6 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/polydawn/refmt v0.89.0 // indirect
-	github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
+	github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
 	github.com/prometheus/client_model v0.6.1 // indirect
 	github.com/prometheus/common v0.55.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
@@ -260,14 +277,14 @@ require (
 	github.com/spf13/cast v1.5.0 // indirect
 	github.com/swaggo/files/v2 v2.0.0 // indirect
 	github.com/tinylib/msgp v1.1.8 // indirect
-	github.com/tklauser/go-sysconf v0.3.12 // indirect
-	github.com/tklauser/numcpus v0.6.1 // indirect
+	github.com/tklauser/go-sysconf v0.3.14 // indirect
+	github.com/tklauser/numcpus v0.8.0 // indirect
 	github.com/ulikunitz/xz v0.5.9 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/vbatts/tar-split v0.11.3 // indirect
-	github.com/vishvananda/netlink v1.2.1-beta.2 // indirect
-	github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect
+	github.com/vishvananda/netlink v1.3.0 // indirect
+	github.com/vishvananda/netns v0.0.4 // indirect
 	github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect
 	github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
 	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
@@ -295,8 +312,8 @@ require (
 	golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
-	gonum.org/v1/gonum v0.13.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect
+	gonum.org/v1/gonum v0.15.0 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,15 @@
+cel.dev/expr v0.15.0 h1:O1jzfJCQBfL5BFoYktaxwIhuttaQPsVWerH9/EEKx0w=
+cel.dev/expr v0.15.0/go.mod h1:TRSuuV7DlVCE/uwv5QbAiW/v8l5O8C4eEPHeu7gf7Sg=
 cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo=
+cloud.google.com/go/auth v0.4.1 h1:Z7YNIhlWRtrnKlZke7z3GMqzvuYzdc2z98F9D1NV5Hg=
+cloud.google.com/go/auth v0.4.1/go.mod h1:QVBuVEKpCn4Zp58hzRGvL0tjRGU0YqdRTdCHM1IHnro=
+cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4=
+cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q=
+cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc=
+cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k=
 dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk=
 dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
 dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
@@ -61,6 +69,8 @@ github.com/c-robinson/iplib v1.0.8/go.mod h1:i3LuuFL1hRT5gFpBRnEydzw8R6yhGkF4szN
 github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
 github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g=
+github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
@@ -70,6 +80,8 @@ github.com/chasefleming/elem-go v0.26.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f
 github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw=
+github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE=
 github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
 github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
@@ -129,8 +141,6 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj6
 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
-github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4=
-github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
 github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
@@ -139,15 +149,18 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/envoyproxy/protoc-gen-validate v1.0.4 h1:gVPz/FMfvh57HdSJQyvBtF00j8JU4zdyUgIUNhlgg0A=
+github.com/envoyproxy/protoc-gen-validate v1.0.4/go.mod h1:qys6tmnRsYrQqIhm2bvKZH4Blx/1gTIZ2UKVY1M+Yew=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
 github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg=
 github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag=
 github.com/francoispqt/gojay v1.2.13 h1:d2m3sFjloqoIUQU3TsHBgj6qg/BVGlTBeHDUmyJnXKk=
 github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY=
-github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
-github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
+github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
+github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
-github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
 github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad h1:dQ93Vd6i25o+zH9vvnZ8mu7jtJQ6jT3D+zE3V8Q49n0=
@@ -168,8 +181,9 @@ github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ4
 github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
+github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
+github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
 github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
 github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
 github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
@@ -184,7 +198,6 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46 h1:lALhXzDk
 github.com/go-skynet/go-llama.cpp v0.0.0-20240314183750-6a8041ef6b46/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
 github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
 github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
-github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
 github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
 github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
 github.com/go-viper/mapstructure/v2 v2.0.0 h1:dhn8MZ1gZ0mzeodTG3jt5Vj/o87xZKuNAprG2mQfMfc=
@@ -228,10 +241,9 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU
 github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
-github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
 github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
@@ -246,8 +258,6 @@ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/go-containerregistry v0.19.2 h1:TannFKE1QSajsP6hPWb5oJNgKe1IKjHukIKDUmvsV6w=
@@ -258,11 +268,11 @@ github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF
 github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
-github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg=
-github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
 github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
 github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o=
+github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
@@ -270,8 +280,12 @@ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+
 github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs=
+github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0=
 github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY=
 github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg=
+github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg=
+github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI=
 github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
 github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4=
 github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
@@ -280,6 +294,7 @@ github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A
 github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
 github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
+github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
 github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
 github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
 github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
@@ -303,23 +318,23 @@ github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFck
 github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
-github.com/ipfs/boxo v0.10.0 h1:tdDAxq8jrsbRkYoF+5Rcqyeb91hgWe2hp7iLu7ORZLY=
-github.com/ipfs/boxo v0.10.0/go.mod h1:Fg+BnfxZ0RPzR0nOodzdIq3A7KgoWAOWsEIImrIQdBM=
+github.com/ipfs/boxo v0.21.0 h1:XpGXb+TQQ0IUdYaeAxGzWjSs6ow/Lce148A/2IbRDVE=
+github.com/ipfs/boxo v0.21.0/go.mod h1:NmweAYeY1USOaJJxouy7DLr/Y5M8UBSsCI2KRivO+TY=
 github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s=
 github.com/ipfs/go-cid v0.4.1/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk=
 github.com/ipfs/go-datastore v0.6.0 h1:JKyz+Gvz1QEZw0LsX1IBn+JFCJQH4SJVFtM4uWU0Myk=
 github.com/ipfs/go-datastore v0.6.0/go.mod h1:rt5M3nNbSO/8q1t4LNkLyUwRs8HupMeN/8O4Vn9YAT8=
 github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk=
 github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps=
-github.com/ipfs/go-ipfs-util v0.0.2 h1:59Sswnk1MFaiq+VcaknX7aYEyGyGDAA73ilhEK2POp8=
-github.com/ipfs/go-ipfs-util v0.0.2/go.mod h1:CbPtkWJzjLdEcezDns2XYaehFVNXG9zrdrtMecczcsQ=
+github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0=
+github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs=
 github.com/ipfs/go-log v1.0.5 h1:2dOuUCB1Z7uoczMWgAyDck5JLb72zHzrMnGnCNNbvY8=
 github.com/ipfs/go-log v1.0.5/go.mod h1:j0b8ZoR+7+R99LD9jZ6+AJsrzkPbSXbZfGakb5JPtIo=
 github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g=
 github.com/ipfs/go-log/v2 v2.5.1 h1:1XdUzF7048prq4aBjDQQ4SL5RxftpRGdXhNRwKSAlcY=
 github.com/ipfs/go-log/v2 v2.5.1/go.mod h1:prSpmC1Gpllc9UYWxDiZDreBYw7zp4Iqp1kOLU9U5UI=
-github.com/ipld/go-ipld-prime v0.20.0 h1:Ud3VwE9ClxpO2LkCYP7vWPc0Fo+dYdYzgxUJZ3uRG4g=
-github.com/ipld/go-ipld-prime v0.20.0/go.mod h1:PzqZ/ZR981eKbgdr3y2DJYeD/8bgMawdGVlJDE8kK+M=
+github.com/ipld/go-ipld-prime v0.21.0 h1:n4JmcpOlPDIxBcY037SVfpd1G+Sj1nKZah0m6QH9C2E=
+github.com/ipld/go-ipld-prime v0.21.0/go.mod h1:3RLqy//ERg/y5oShXXdx5YIp50cFGOanyMctpPjsvxQ=
 github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus=
 github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc=
 github.com/jaypipes/ghw v0.12.0 h1:xU2/MDJfWmBhJnujHY9qwXQLs3DBsf0/Xa9vECY0Tho=
@@ -365,6 +380,8 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/labstack/echo/v4 v4.12.0 h1:IKpw49IMryVB2p1a4dzwlhP1O2Tf2E0Ir/450lH+kI0=
 github.com/labstack/echo/v4 v4.12.0/go.mod h1:UP9Cr2DJXbOK3Kr9ONYzNowSh7HP0aG0ShAyycHSJvM=
 github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
@@ -377,26 +394,20 @@ github.com/libp2p/go-cidranger v1.1.0 h1:ewPN8EZ0dd1LSnrtuwd4709PXVcITVeuwbag38y
 github.com/libp2p/go-cidranger v1.1.0/go.mod h1:KWZTfSr+r9qEo9OkI9/SIEeAtw+NNoU0dXIXt15Okic=
 github.com/libp2p/go-flow-metrics v0.1.0 h1:0iPhMI8PskQwzh57jB9WxIuIOQ0r+15PChFGkx3Q3WM=
 github.com/libp2p/go-flow-metrics v0.1.0/go.mod h1:4Xi8MX8wj5aWNDAZttg6UPmc0ZrnFNsMtpsYUClFtro=
-github.com/libp2p/go-libp2p v0.35.2 h1:287oHbuplkrLdAF+syB0n/qDgd50AUBtEODqS0e0HDs=
-github.com/libp2p/go-libp2p v0.35.2/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
-github.com/libp2p/go-libp2p v0.35.4 h1:FDiBUYLkueFwsuNJUZaxKRdpKvBOWU64qQPL768bSeg=
-github.com/libp2p/go-libp2p v0.35.4/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
 github.com/libp2p/go-libp2p v0.36.2 h1:BbqRkDaGC3/5xfaJakLV/BrpjlAuYqSB0lRvtzL3B/U=
 github.com/libp2p/go-libp2p v0.36.2/go.mod h1:XO3joasRE4Eup8yCTTP/+kX+g92mOgRaadk46LmPhHY=
 github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
 github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
-github.com/libp2p/go-libp2p-kad-dht v0.25.2 h1:FOIk9gHoe4YRWXTu8SY9Z1d0RILol0TrtApsMDPjAVQ=
-github.com/libp2p/go-libp2p-kad-dht v0.25.2/go.mod h1:6za56ncRHYXX4Nc2vn8z7CZK0P4QiMcrn77acKLM2Oo=
+github.com/libp2p/go-libp2p-kad-dht v0.26.1 h1:AazV3LCImYVkDUGAHx5lIEgZ9iUI2QQKH5GMRQU8uEA=
+github.com/libp2p/go-libp2p-kad-dht v0.26.1/go.mod h1:mqRUGJ/+7ziQ3XknU2kKHfsbbgb9xL65DXjPOJwmZF8=
 github.com/libp2p/go-libp2p-kbucket v0.6.3 h1:p507271wWzpy2f1XxPzCQG9NiN6R6lHL9GiSErbQQo0=
 github.com/libp2p/go-libp2p-kbucket v0.6.3/go.mod h1:RCseT7AH6eJWxxk2ol03xtP9pEHetYSPXOaJnOiD8i0=
-github.com/libp2p/go-libp2p-pubsub v0.11.0 h1:+JvS8Kty0OiyUiN0i8H5JbaCgjnJTRnTHe4rU88dLFc=
-github.com/libp2p/go-libp2p-pubsub v0.11.0/go.mod h1:QEb+hEV9WL9wCiUAnpY29FZR6W3zK8qYlaml8R4q6gQ=
 github.com/libp2p/go-libp2p-pubsub v0.12.0 h1:PENNZjSfk8KYxANRlpipdS7+BfLmOl3L2E/6vSNjbdI=
 github.com/libp2p/go-libp2p-pubsub v0.12.0/go.mod h1:Oi0zw9aw8/Y5GC99zt+Ef2gYAl+0nZlwdJonDyOz/sE=
 github.com/libp2p/go-libp2p-record v0.2.0 h1:oiNUOCWno2BFuxt3my4i1frNrt7PerzB3queqa1NkQ0=
 github.com/libp2p/go-libp2p-record v0.2.0/go.mod h1:I+3zMkvvg5m2OcSdoL0KPljyJyvNDFGKX7QdlpYUcwk=
-github.com/libp2p/go-libp2p-routing-helpers v0.7.2 h1:xJMFyhQ3Iuqnk9Q2dYE1eUTzsah7NLw3Qs2zjUV78T0=
-github.com/libp2p/go-libp2p-routing-helpers v0.7.2/go.mod h1:cN4mJAD/7zfPKXBcs9ze31JGYAZgzdABEm+q/hkswb8=
+github.com/libp2p/go-libp2p-routing-helpers v0.7.4 h1:6LqS1Bzn5CfDJ4tzvP9uwh42IB7TJLNFJA6dEeGBv84=
+github.com/libp2p/go-libp2p-routing-helpers v0.7.4/go.mod h1:we5WDj9tbolBXOuF1hGOkR+r7Uh1408tQbAKaT5n1LE=
 github.com/libp2p/go-libp2p-testing v0.12.0 h1:EPvBb4kKMWO29qP4mZGyhVzUyR25dvfUIK5WDu6iPUA=
 github.com/libp2p/go-libp2p-testing v0.12.0/go.mod h1:KcGDRXyN7sQCllucn1cOOS+Dmm7ujhfEyXQL5lvkcPg=
 github.com/libp2p/go-msgio v0.3.0 h1:mf3Z8B1xcFN314sWX+2vOTShIE0Mmn2TXn3YCUQGNj0=
@@ -413,8 +424,8 @@ github.com/libp2p/zeroconf/v2 v2.2.0 h1:Cup06Jv6u81HLhIj1KasuNM/RHHrJ8T7wOTS4+Tv
 github.com/libp2p/zeroconf/v2 v2.2.0/go.mod h1:fuJqLnUwZTshS3U/bMRJ3+ow/v9oid1n0DmyYyNO1Xs=
 github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
 github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
+github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7 h1:5RK988zAqB3/AN3opGfRpoQgAVqr6/A5+qRTi67VUZY=
+github.com/lufia/plan9stats v0.0.0-20240819163618-b1d8f4d146e7/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k=
 github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI=
 github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
@@ -440,8 +451,6 @@ github.com/microcosm-cc/bluemonday v1.0.26 h1:xbqSvqzQMeEHCqMi64VAs4d8uy6Mequs3r
 github.com/microcosm-cc/bluemonday v1.0.26/go.mod h1:JyzOCs9gkyQyjs+6h10UEVSe02CGwkhd72Xdqh78TWs=
 github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
-github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
-github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
 github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ=
 github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ=
 github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c h1:bzE/A84HN25pxAuk9Eej1Kz9OUelF97nAc82bDquQI8=
@@ -477,20 +486,18 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
 github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
 github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
 github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
-github.com/mudler/edgevpn v0.26.2 h1:OK4jfk7sYjuU7vCh+geUJk38lsxRgMk+EdsS9s0hioE=
-github.com/mudler/edgevpn v0.26.2/go.mod h1:lplntB9N6LzGNqeSM3XHCq8kyDPsNhY3jqEbWGD2WaQ=
-github.com/mudler/edgevpn v0.27.0 h1:FnBVzPs098DTgbUkiwm22n30hmEVBAq+PVpXanqx6qo=
-github.com/mudler/edgevpn v0.27.0/go.mod h1:Hwvr+i+dePgn/Yh+EMMvqcw9ByUCLAWD9TgYtJYV95Y=
-github.com/mudler/edgevpn v0.27.1 h1:UKW7/JW4l2cBAPMRnlZRHbuFDGrv7resVJlFD34WBDE=
-github.com/mudler/edgevpn v0.27.1/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
-github.com/mudler/edgevpn v0.27.2 h1:FsQ95jPCDJP9LzKJYCHx70z08DGXK5yrHMzH9Qok3nE=
-github.com/mudler/edgevpn v0.27.2/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
-github.com/mudler/edgevpn v0.27.3 h1:9g6M7Q+2GdwDN12KmjhYJDi69cttvDW7luBmZioD2ZM=
-github.com/mudler/edgevpn v0.27.3/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
+github.com/mudler/edgevpn v0.27.4 h1:T/irkShcnU6h9OZqtvWXpNo+3gQVPUMBEoUutxJ3YUg=
+github.com/mudler/edgevpn v0.27.4/go.mod h1:NFs/RpDHCaltPFnZmOLCiUmVpTQloER1LbAtptOzqrw=
+github.com/mudler/edgevpn v0.28.0 h1:oF/Msx3zPNajy3uYLPRT5M7H3Z+sCMU0wAA8TkB11PI=
+github.com/mudler/edgevpn v0.28.0/go.mod h1:/xk8vnXUcGajPPMW5rZhPt1aD0b95LeOj2xGbRbDS8A=
+github.com/mudler/edgevpn v0.28.2 h1:wxLrH9b3NNQDgMb0Uy4gmqbGh6Ad5jdbf21GrU32xVU=
+github.com/mudler/edgevpn v0.28.2/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
+github.com/mudler/edgevpn v0.28.3 h1:yIuoMExwKHy/mNMBXIsm6FUFbnB9ELIxw9KXrK9KHDk=
+github.com/mudler/edgevpn v0.28.3/go.mod h1:HWcdIwj5zBgOD04Hn3I+J5E5Yb3kK1CwwWaEe6/QERo=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
-github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
-github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c/go.mod h1:gY3wyrhkRySJtmtI/JPt4a2mKv48h/M9pEZIW+SjeC0=
+github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82 h1:FVT07EI8njvsD4tC2Hw8Xhactp5AWhsQWD4oTeQuSAU=
+github.com/mudler/go-processmanager v0.0.0-20240820160718-8b802d3ecf82/go.mod h1:Urp7LG5jylKoDq0663qeBh0pINGcRl35nXdKx82PSoU=
 github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f h1:cxtMSRkUfy+mjIQ3yMrU0txwQ4It913NEN4m1H8WWgo=
 github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
 github.com/mudler/water v0.0.0-20221010214108-8c7313014ce0 h1:Qh6ghkMgTu6siFbTf7L3IszJmshMhXxNL4V+t7IIA6w=
@@ -527,30 +534,18 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
 github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20240606155928-41c9013fa46a h1:jLmaG6BYcFvUDGFJM8B9kOM2yfvaTLxrKcFkBn4nstA=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20240606155928-41c9013fa46a/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
 github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
 github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
-github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
-github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
 github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
 github.com/nxadm/tail v1.4.11/go.mod h1:OTaG3NK980DZzxbRq6lEuzgU+mug70nY11sMd4JXXHc=
 github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
-github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
-github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
-github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
 github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
-github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
-github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
 github.com/onsi/ginkgo/v2 v2.20.0 h1:PE84V2mHqoT1sglvHc8ZdQtPcwmvvt29WLEEO3xmdZw=
 github.com/onsi/ginkgo/v2 v2.20.0/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
-github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
-github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
-github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
-github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
-github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
+github.com/onsi/ginkgo/v2 v2.20.1 h1:YlVIbqct+ZmnEph770q9Q7NVAz4wwIiVNahee6JyUzo=
+github.com/onsi/ginkgo/v2 v2.20.1/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
 github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
 github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -581,21 +576,13 @@ github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
 github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
 github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
-github.com/pion/datachannel v1.5.6 h1:1IxKJntfSlYkpUj8LlYRSWpYiTTC02nUrOE8T3DqGeg=
-github.com/pion/datachannel v1.5.6/go.mod h1:1eKT6Q85pRnr2mHiWHxJwO50SfZRtWHTsNIVb/NfGW4=
 github.com/pion/datachannel v1.5.8 h1:ph1P1NsGkazkjrvyMfhRBUAWMxugJjq2HfQifaOoSNo=
 github.com/pion/datachannel v1.5.8/go.mod h1:PgmdpoaNBLX9HNzNClmdki4DYW5JtI7Yibu8QzbL3tI=
 github.com/pion/dtls/v2 v2.2.7/go.mod h1:8WiMkebSHFD0T+dIU+UeBaoV7kDhOW5oDCzZ7WZ/F9s=
-github.com/pion/dtls/v2 v2.2.11 h1:9U/dpCYl1ySttROPWJgqWKEylUdT0fXp/xst6JwY5Ks=
-github.com/pion/dtls/v2 v2.2.11/go.mod h1:d9SYc9fch0CqK90mRk1dC7AkzzpwJj6u2GU3u+9pqFE=
 github.com/pion/dtls/v2 v2.2.12 h1:KP7H5/c1EiVAAKUmXyCzPiQe5+bCJrpOeKg/L05dunk=
 github.com/pion/dtls/v2 v2.2.12/go.mod h1:d9SYc9fch0CqK90mRk1dC7AkzzpwJj6u2GU3u+9pqFE=
-github.com/pion/ice/v2 v2.3.25 h1:M5rJA07dqhi3nobJIg+uPtcVjFECTrhcR3n0ns8kDZs=
-github.com/pion/ice/v2 v2.3.25/go.mod h1:KXJJcZK7E8WzrBEYnV4UtqEZsGeWfHxsNqhVcVvgjxw=
 github.com/pion/ice/v2 v2.3.34 h1:Ic1ppYCj4tUOcPAp76U6F3fVrlSw8A9JtRXLqw6BbUM=
 github.com/pion/ice/v2 v2.3.34/go.mod h1:mBF7lnigdqgtB+YHkaY/Y6s6tsyRyo4u4rPGRuOjUBQ=
-github.com/pion/interceptor v0.1.29 h1:39fsnlP1U8gw2JzOFWdfCU82vHvhW9o0rZnZF56wF+M=
-github.com/pion/interceptor v0.1.29/go.mod h1:ri+LGNjRUc5xUNtDEPzfdkmSqISixVTBF/z/Zms/6T4=
 github.com/pion/interceptor v0.1.30 h1:au5rlVHsgmxNi+v/mjOPazbW1SHzfx7/hYOEYQnUcxA=
 github.com/pion/interceptor v0.1.30/go.mod h1:RQuKT5HTdkP2Fi0cuOS5G5WNymTjzXaGF75J4k7z2nc=
 github.com/pion/logging v0.2.2 h1:M9+AIj/+pxNsDfAT64+MAVgJO0rsyLnoJKCqf//DoeY=
@@ -608,39 +595,27 @@ github.com/pion/rtcp v1.2.12/go.mod h1:sn6qjxvnwyAkkPzPULIbVqSKI5Dv54Rv7VG0kNxh9
 github.com/pion/rtcp v1.2.14 h1:KCkGV3vJ+4DAJmvP0vaQShsb0xkRfWkO540Gy102KyE=
 github.com/pion/rtcp v1.2.14/go.mod h1:sn6qjxvnwyAkkPzPULIbVqSKI5Dv54Rv7VG0kNxh9L4=
 github.com/pion/rtp v1.8.3/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
-github.com/pion/rtp v1.8.6 h1:MTmn/b0aWWsAzux2AmP8WGllusBVw4NPYPVFFd7jUPw=
-github.com/pion/rtp v1.8.6/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
 github.com/pion/rtp v1.8.9 h1:E2HX740TZKaqdcPmf4pw6ZZuG8u5RlMMt+l3dxeu6Wk=
 github.com/pion/rtp v1.8.9/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
-github.com/pion/sctp v1.8.13/go.mod h1:YKSgO/bO/6aOMP9LCie1DuD7m+GamiK2yIiPM6vH+GA=
-github.com/pion/sctp v1.8.16 h1:PKrMs+o9EMLRvFfXq59WFsC+V8mN1wnKzqrv+3D/gYY=
-github.com/pion/sctp v1.8.16/go.mod h1:P6PbDVA++OJMrVNg2AL3XtYHV4uD6dvfyOovCgMs0PE=
 github.com/pion/sctp v1.8.33 h1:dSE4wX6uTJBcNm8+YlMg7lw1wqyKHggsP5uKbdj+NZw=
 github.com/pion/sctp v1.8.33/go.mod h1:beTnqSzewI53KWoG3nqB282oDMGrhNxBdb+JZnkCwRM=
 github.com/pion/sdp/v3 v3.0.9 h1:pX++dCHoHUwq43kuwf3PyJfHlwIj4hXA7Vrifiq0IJY=
 github.com/pion/sdp/v3 v3.0.9/go.mod h1:B5xmvENq5IXJimIO4zfp6LAe1fD9N+kFv+V/1lOdz8M=
-github.com/pion/srtp/v2 v2.0.18 h1:vKpAXfawO9RtTRKZJbG4y0v1b11NZxQnxRl85kGuUlo=
-github.com/pion/srtp/v2 v2.0.18/go.mod h1:0KJQjA99A6/a0DOVTu1PhDSw0CXF2jTkqOoMg3ODqdA=
 github.com/pion/srtp/v2 v2.0.20 h1:HNNny4s+OUmG280ETrCdgFndp4ufx3/uy85EawYEhTk=
 github.com/pion/srtp/v2 v2.0.20/go.mod h1:0KJQjA99A6/a0DOVTu1PhDSw0CXF2jTkqOoMg3ODqdA=
 github.com/pion/stun v0.6.1 h1:8lp6YejULeHBF8NmV8e2787BogQhduZugh5PdhDyyN4=
 github.com/pion/stun v0.6.1/go.mod h1:/hO7APkX4hZKu/D0f2lHzNyvdkTGtIy3NDmLR7kSz/8=
 github.com/pion/transport/v2 v2.2.1/go.mod h1:cXXWavvCnFF6McHTft3DWS9iic2Mftcz1Aq29pGcU5g=
-github.com/pion/transport/v2 v2.2.2/go.mod h1:OJg3ojoBJopjEeECq2yJdXH9YVrUJ1uQ++NjXLOUorc=
 github.com/pion/transport/v2 v2.2.3/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLhCCgpRK4p0=
 github.com/pion/transport/v2 v2.2.4/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLhCCgpRK4p0=
-github.com/pion/transport/v2 v2.2.5 h1:iyi25i/21gQck4hfRhomF6SktmUQjRsRW4WJdhfc3Kc=
-github.com/pion/transport/v2 v2.2.5/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLhCCgpRK4p0=
 github.com/pion/transport/v2 v2.2.10 h1:ucLBLE8nuxiHfvkFKnkDQRYWYfp8ejf4YBOPfaQpw6Q=
 github.com/pion/transport/v2 v2.2.10/go.mod h1:sq1kSLWs+cHW9E+2fJP95QudkzbK7wscs8yYgQToO5E=
 github.com/pion/transport/v3 v3.0.1/go.mod h1:UY7kiITrlMv7/IKgd5eTUcaahZx5oUN3l9SzK5f5xE0=
-github.com/pion/transport/v3 v3.0.2 h1:r+40RJR25S9w3jbA6/5uEPTzcdn7ncyU44RWCbHkLg4=
-github.com/pion/transport/v3 v3.0.2/go.mod h1:nIToODoOlb5If2jF9y2Igfx3PFYWfuXi37m0IlWa/D0=
+github.com/pion/transport/v3 v3.0.7 h1:iRbMH05BzSNwhILHoBoAPxoB9xQgOaJk+591KC9P1o0=
+github.com/pion/transport/v3 v3.0.7/go.mod h1:YleKiTZ4vqNxVwh77Z0zytYi7rXHl7j6uPLGhhz9rwo=
 github.com/pion/turn/v2 v2.1.3/go.mod h1:huEpByKKHix2/b9kmTAM3YoX6MKP+/D//0ClgUYR2fY=
 github.com/pion/turn/v2 v2.1.6 h1:Xr2niVsiPTB0FPtt+yAWKFUkU1eotQbGgpTIld4x1Gc=
 github.com/pion/turn/v2 v2.1.6/go.mod h1:huEpByKKHix2/b9kmTAM3YoX6MKP+/D//0ClgUYR2fY=
-github.com/pion/webrtc/v3 v3.2.40 h1:Wtfi6AZMQg+624cvCXUuSmrKWepSB7zfgYDOYqsSOVU=
-github.com/pion/webrtc/v3 v3.2.40/go.mod h1:M1RAe3TNTD1tzyvqHrbVODfwdPGSXOUo/OgpoGGJqFY=
 github.com/pion/webrtc/v3 v3.3.0 h1:Rf4u6n6U5t5sUxhYPQk/samzU/oDv7jk6BA5hyO2F9I=
 github.com/pion/webrtc/v3 v3.3.0/go.mod h1:hVmrDJvwhEertRWObeb1xzulzHGeVUoPlWvxdGzcfU0=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -652,11 +627,9 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/polydawn/refmt v0.89.0 h1:ADJTApkvkeBZsN0tBTx8QjpD9JkmxbKp0cxfr9qszm4=
 github.com/polydawn/refmt v0.89.0/go.mod h1:/zvteZs/GwLtCgZ4BL6CBsk9IKIlexP43ObX9AxTqTw=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
+github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
+github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
 github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
-github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
-github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
 github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
 github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
@@ -671,8 +644,6 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg
 github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
 github.com/quic-go/qpack v0.4.0 h1:Cr9BXA1sQS2SmDUWjSofMPNKmvF6IiIfDRmgU0w1ZCo=
 github.com/quic-go/qpack v0.4.0/go.mod h1:UZVnYIfi5GRk+zI9UMaCPsmZ2xKJP7XBUvVyT1Knj9A=
-github.com/quic-go/quic-go v0.44.0 h1:So5wOr7jyO4vzL2sd8/pD9Kesciv91zSk8BoFngItQ0=
-github.com/quic-go/quic-go v0.44.0/go.mod h1:z4cx/9Ny9UtGITIPzmPTXh1ULfOyWh4qGQlpnPcWmek=
 github.com/quic-go/quic-go v0.46.0 h1:uuwLClEEyk1DNvchH8uCByQVjo3yKL9opKulExNDs7Y=
 github.com/quic-go/quic-go v0.46.0/go.mod h1:1dLehS7TIR64+vxGR70GDcatWTOtMX2PUtnKsjbTurI=
 github.com/quic-go/webtransport-go v0.8.0 h1:HxSrwun11U+LlmwpgM1kEqIqH90IT4N8auv/cD7QFJg=
@@ -700,9 +671,10 @@ github.com/sashabaranov/go-openai v1.26.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adO
 github.com/schollz/progressbar/v3 v3.14.4 h1:W9ZrDSJk7eqmQhd3uxFNNcTr0QL+xuGNI9dEMrw0r74=
 github.com/schollz/progressbar/v3 v3.14.4/go.mod h1:aT3UQ7yGm+2ZjeXPqsjTenwL3ddUiuZ0kfQ/2tHlyNI=
 github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
-github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4=
 github.com/shirou/gopsutil/v3 v3.24.5 h1:i0t8kL+kQTvpAYToeuiVk3TgDeKOFioZO3Ztz/iZ9pI=
 github.com/shirou/gopsutil/v3 v3.24.5/go.mod h1:bsoOS1aStSs9ErQ1WWfxllSeS1K5D+U30r2NfcubMVk=
+github.com/shirou/gopsutil/v4 v4.24.7 h1:V9UGTK4gQ8HvcnPKf6Zt3XHyQq/peaekfxpJ2HSocJk=
+github.com/shirou/gopsutil/v4 v4.24.7/go.mod h1:0uW/073rP7FYLOkvxolUQM5rMOLTNmRXnFKafpb71rw=
 github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
 github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
 github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
@@ -778,12 +750,10 @@ github.com/thxcode/gguf-parser-go v0.1.0 h1:J4QruXyEQGjrAKeKZFlsD2na9l4XF5+bjR19
 github.com/thxcode/gguf-parser-go v0.1.0/go.mod h1:Tn1PsO/YDEtLIxm1+QDCjIIH9L/9Sr7+KpxZKm0sEuE=
 github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
 github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
-github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
-github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
-github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
-github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4=
-github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
-github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
+github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU=
+github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY=
+github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY=
+github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE=
 github.com/tmc/langchaingo v0.1.12 h1:yXwSu54f3b1IKw0jJ5/DWu+qFVH1NBblwC0xddBzGJE=
 github.com/tmc/langchaingo v0.1.12/go.mod h1:cd62xD6h+ouk8k/QQFhOsjRYBSA1JJ5UVKXSIgm7Ni4=
 github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
@@ -793,8 +763,6 @@ github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
 github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
-github.com/urfave/cli/v2 v2.27.3 h1:/POWahRmdh7uztQ3CYnaDddk0Rm90PyOgIxgW2rr41M=
-github.com/urfave/cli/v2 v2.27.3/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
 github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
 github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
@@ -811,9 +779,13 @@ github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49u
 github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM=
 github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs=
 github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
+github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
+github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
 github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
 github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg=
 github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
+github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
+github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
 github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ=
 github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw=
 github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 h1:EKhdznlJHPMoKr0XTrX+IlJs1LH3lyx2nfr1dOlZ79k=
@@ -841,12 +813,13 @@ github.com/yuin/goldmark v1.5.4 h1:2uY/xC0roWy8IBEGLgB1ywIoEJFGmRrX21YQcvGZzjU=
 github.com/yuin/goldmark v1.5.4/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 github.com/yuin/goldmark-emoji v1.0.2 h1:c/RgTShNgHTtc6xdz2KKI74jJr6rWi7FPgnP9GAsO5s=
 github.com/yuin/goldmark-emoji v1.0.2/go.mod h1:RhP/RWpexdp+KHs7ghKnifRoIs/Bq4nDS7tRbCkOwKY=
-github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
 github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
 go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA=
 go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
 go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0 h1:9l89oX4ba9kHbBol3Xin3leYJ+252h0zszDtBwyKe2A=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0/go.mod h1:XLZfZboOJWHNKUv7eH0inh0E9VV6eWDFB/9yJyTLPp0=
 go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo=
 go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4=
 go.opentelemetry.io/otel/exporters/prometheus v0.50.0 h1:2Ewsda6hejmbhGFyUvWZjUThC98Cf8Zy6g0zkIimOng=
@@ -861,12 +834,8 @@ go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+
 go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI=
 go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
-go.uber.org/dig v1.17.1 h1:Tga8Lz8PcYNsWsyHMZ1Vm0OQOUaJNDyvPImgbAu9YSc=
-go.uber.org/dig v1.17.1/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
 go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw=
 go.uber.org/dig v1.18.0/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
-go.uber.org/fx v1.22.1 h1:nvvln7mwyT5s1q201YE29V/BFrGor6vMiDNpU/78Mys=
-go.uber.org/fx v1.22.1/go.mod h1:HT2M7d7RHo+ebKGh9NRcrsrHHfpZ60nW3QRubMRfv48=
 go.uber.org/fx v1.22.2 h1:iPW+OPxv0G8w75OemJ1RAnTUrF55zOJlXlo1TbJ0Buw=
 go.uber.org/fx v1.22.2/go.mod h1:o/D9n+2mLP6v1EG+qsdT1O8wKopYAsqZasju97SDFCU=
 go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
@@ -897,20 +866,11 @@ golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
 golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
-golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio=
 golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
 golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
-golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
-golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
-golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
-golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
 golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
 golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
-golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
-golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
-golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
 golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI=
 golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
 golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -927,8 +887,6 @@ golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
-golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
-golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
 golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -943,33 +901,28 @@ golang.org/x/net v0.0.0-20190313220215-9f648a60d977/go.mod h1:t9HGtf8HONx5eT2rtn
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
-golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
 golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
-golang.org/x/net v0.13.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA=
 golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
 golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
-golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
-golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
-golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
-golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
 golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
 golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
+golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
 golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -981,8 +934,6 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
-golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
 golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -992,20 +943,15 @@ golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5h
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190316082340-a2f829d7f35f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -1019,6 +965,7 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220906165534-d0df966e6959/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -1030,11 +977,7 @@ golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
-golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
 golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -1044,14 +987,9 @@ golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
 golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
-golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o=
 golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
 golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
-golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
-golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
-golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
-golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
 golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
 golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -1063,11 +1001,8 @@ golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
-golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
-golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
-golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
 golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
 golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -1089,14 +1024,11 @@ golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtn
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
 golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
-golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
-golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
-golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
 golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
 golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -1109,11 +1041,13 @@ golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 h1:vDy//hdR+GnROE3
 golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478/go.mod h1:bVQfyl2sCM/QIIGHpWbFGfHPuDvqnCNkT6MQLTCjO/U=
 golang.zx2c4.com/wireguard/windows v0.5.3 h1:On6j2Rpn3OEMXqBq00QEDC7bWSZrPIHKIus8eIuExIE=
 golang.zx2c4.com/wireguard/windows v0.5.3/go.mod h1:9TEe8TJmtwyQebdFwAkEWOPr3prrtqm+REGFifP60hI=
-gonum.org/v1/gonum v0.13.0 h1:a0T3bh+7fhRyqeNbiC3qVHYmkiQgit3wnNan/2c0HMM=
-gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU=
+gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
+gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
 google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
 google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
 google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
+google.golang.org/api v0.180.0 h1:M2D87Yo0rGBPWpo1orwfCLehUUL6E7/TYe5gvMQWDh4=
+google.golang.org/api v0.180.0/go.mod h1:51AiyoEg1MJPSZ9zvklA8VnRILPXxn1iVen9v25XHAE=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
@@ -1125,8 +1059,12 @@ google.golang.org/genproto v0.0.0-20181202183823-bd91e49a0898/go.mod h1:7Ep/1NZk
 google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1:Zy9XzmMEflZ/MAaA7vNcoebnRAld7FsPW1EeBB7V0m8=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0=
+google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda h1:wu/KJm9KJwpfHWhkkZGohVC6KRrc1oJNr4jwtQMOQXw=
+google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda/go.mod h1:g2LLCvCeCSir/JJSWosk19BR4NVxGqHUC6rxIRsd7Aw=
+google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 h1:MuYw1wJzT+ZkybKfaOXKp5hJiZDn2iHaXRw0mRYdHSc=
+google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4/go.mod h1:px9SlOOZBg1wM1zdnr8jEL4CNGUBZ+ZKYtNPApNQc4c=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 h1:Di6ANFilr+S60a4S61ZM00vLdw0IrQOSMS2/6mrnOU0=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY=
 google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
 google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
 google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
@@ -1146,8 +1084,6 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
 google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -1159,13 +1095,11 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
 gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
 gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
 gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
-gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473/go.mod h1:N1eN2tsCx0Ydtgjl4cqmbRCsY4/+z4cYDeqwZTk6zog=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
 gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
@@ -1184,8 +1118,6 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
 honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
 howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
 howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
-lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI=
-lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
 lukechampine.com/blake3 v1.3.0 h1:sJ3XhFINmHSrYCgl958hscfIa3bw8x4DqMP3u1YvoYE=
 lukechampine.com/blake3 v1.3.0/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
 oras.land/oras-go/v2 v2.5.0 h1:o8Me9kLY74Vp5uw07QXPiitjsw7qNXi8Twd+19Zf02c=
--- a/pkg/grpc/backend.go
+++ b/pkg/grpc/backend.go
@@ -3,7 +3,6 @@ package grpc
 import (
 	"context"

-	"github.com/mudler/LocalAI/core/schema"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	"google.golang.org/grpc"
 )
@@ -18,10 +17,10 @@ func NewClient(address string, parallel bool, wd WatchDog, enableWatchDog bool)
 	if bc, ok := embeds[address]; ok {
 		return bc
 	}
-	return NewGrpcClient(address, parallel, wd, enableWatchDog)
+	return buildClient(address, parallel, wd, enableWatchDog)
 }

-func NewGrpcClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) Backend {
+func buildClient(address string, parallel bool, wd WatchDog, enableWatchDog bool) Backend {
 	if !enableWatchDog {
 		wd = nil
 	}
@@ -41,7 +40,8 @@ type Backend interface {
 	PredictStream(ctx context.Context, in *pb.PredictOptions, f func(s []byte), opts ...grpc.CallOption) error
 	GenerateImage(ctx context.Context, in *pb.GenerateImageRequest, opts ...grpc.CallOption) (*pb.Result, error)
 	TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOption) (*pb.Result, error)
-	AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error)
+	SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error)
+	AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
 	TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
 	Status(ctx context.Context) (*pb.StatusResponse, error)

--- a/pkg/grpc/base/base.go
+++ b/pkg/grpc/base/base.go
@@ -6,7 +6,6 @@ import (
 	"fmt"
 	"os"

-	"github.com/mudler/LocalAI/core/schema"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	gopsutil "github.com/shirou/gopsutil/v3/process"
 )
@@ -53,14 +52,18 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
 	return fmt.Errorf("unimplemented")
 }

-func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error) {
-	return schema.TranscriptionResult{}, fmt.Errorf("unimplemented")
+func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error) {
+	return pb.TranscriptResult{}, fmt.Errorf("unimplemented")
 }

 func (llm *Base) TTS(*pb.TTSRequest) error {
 	return fmt.Errorf("unimplemented")
 }

+func (llm *Base) SoundGeneration(*pb.SoundGenerationRequest) error {
+	return fmt.Errorf("unimplemented")
+}
+
 func (llm *Base) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
 	return pb.TokenizationResponse{}, fmt.Errorf("unimplemented")
 }
--- a/pkg/grpc/client.go
+++ b/pkg/grpc/client.go
@@ -7,7 +7,6 @@ import (
 	"sync"
 	"time"

-	"github.com/mudler/LocalAI/core/schema"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
@@ -39,6 +38,18 @@ func (c *Client) setBusy(v bool) {
 	c.Unlock()
 }

+func (c *Client) wdMark() {
+	if c.wd != nil {
+		c.wd.Mark(c.address)
+	}
+}
+
+func (c *Client) wdUnMark() {
+	if c.wd != nil {
+		c.wd.UnMark(c.address)
+	}
+}
+
 func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
 	if !c.parallel {
 		c.opMutex.Lock()
@@ -76,10 +87,8 @@ func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -97,10 +106,8 @@ func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grp
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -118,10 +125,8 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -138,10 +143,8 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return err
@@ -177,10 +180,8 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -197,10 +198,8 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -210,44 +209,40 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
 	return client.TTS(ctx, in, opts...)
 }

-func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
+func (c *Client) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) {
 	if !c.parallel {
 		c.opMutex.Lock()
 		defer c.opMutex.Unlock()
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
 	}
 	defer conn.Close()
 	client := pb.NewBackendClient(conn)
-	res, err := client.AudioTranscription(ctx, in, opts...)
+	return client.SoundGeneration(ctx, in, opts...)
+}
+
+func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) {
+	if !c.parallel {
+		c.opMutex.Lock()
+		defer c.opMutex.Unlock()
+	}
+	c.setBusy(true)
+	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
+	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
 	}
-	tresult := &schema.TranscriptionResult{}
-	for _, s := range res.Segments {
-		tks := []int{}
-		for _, t := range s.Tokens {
-			tks = append(tks, int(t))
-		}
-		tresult.Segments = append(tresult.Segments,
-			schema.Segment{
-				Text:   s.Text,
-				Id:     int(s.Id),
-				Start:  time.Duration(s.Start),
-				End:    time.Duration(s.End),
-				Tokens: tks,
-			})
-	}
-	tresult.Text = res.Text
-	return tresult, err
+	defer conn.Close()
+	client := pb.NewBackendClient(conn)
+	return client.AudioTranscription(ctx, in, opts...)
 }

 func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) {
@@ -257,10 +252,8 @@ func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
-	if c.wd != nil {
-		c.wd.Mark(c.address)
-		defer c.wd.UnMark(c.address)
-	}
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -299,6 +292,8 @@ func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ..
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -313,6 +308,8 @@ func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, o
 		c.opMutex.Lock()
 		defer c.opMutex.Unlock()
 	}
+	c.wdMark()
+	defer c.wdUnMark()
 	c.setBusy(true)
 	defer c.setBusy(false)
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
@@ -331,6 +328,8 @@ func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ..
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -347,6 +346,8 @@ func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
@@ -363,6 +364,8 @@ func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.
 	}
 	c.setBusy(true)
 	defer c.setBusy(false)
+	c.wdMark()
+	defer c.wdUnMark()
 	conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
 	if err != nil {
 		return nil, err
--- a/pkg/grpc/embed.go
+++ b/pkg/grpc/embed.go
@@ -2,9 +2,7 @@ package grpc

 import (
 	"context"
-	"time"

-	"github.com/mudler/LocalAI/core/schema"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/metadata"
@@ -53,28 +51,12 @@ func (e *embedBackend) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.
 	return e.s.TTS(ctx, in)
 }

-func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.TranscriptionResult, error) {
-	r, err := e.s.AudioTranscription(ctx, in)
-	if err != nil {
-		return nil, err
-	}
-	tr := &schema.TranscriptionResult{}
-	for _, s := range r.Segments {
-		var tks []int
-		for _, t := range s.Tokens {
-			tks = append(tks, int(t))
-		}
-		tr.Segments = append(tr.Segments,
-			schema.Segment{
-				Text:   s.Text,
-				Id:     int(s.Id),
-				Start:  time.Duration(s.Start),
-				End:    time.Duration(s.End),
-				Tokens: tks,
-			})
-	}
-	tr.Text = r.Text
-	return tr, err
+func (e *embedBackend) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest, opts ...grpc.CallOption) (*pb.Result, error) {
+	return e.s.SoundGeneration(ctx, in)
+}
+
+func (e *embedBackend) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error) {
+	return e.s.AudioTranscription(ctx, in)
 }

 func (e *embedBackend) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error) {
--- a/pkg/grpc/interface.go
+++ b/pkg/grpc/interface.go
@@ -1,7 +1,6 @@
 package grpc

 import (
-	"github.com/mudler/LocalAI/core/schema"
 	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
 )

@@ -15,8 +14,9 @@ type LLM interface {
 	Load(*pb.ModelOptions) error
 	Embeddings(*pb.PredictOptions) ([]float32, error)
 	GenerateImage(*pb.GenerateImageRequest) error
-	AudioTranscription(*pb.TranscriptRequest) (schema.TranscriptionResult, error)
+	AudioTranscription(*pb.TranscriptRequest) (pb.TranscriptResult, error)
 	TTS(*pb.TTSRequest) error
+	SoundGeneration(*pb.SoundGenerationRequest) error
 	TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
 	Status() (pb.StatusResponse, error)

--- a/pkg/grpc/server.go
+++ b/pkg/grpc/server.go
@@ -84,7 +84,19 @@ func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error)
 	if err != nil {
 		return &pb.Result{Message: fmt.Sprintf("Error generating audio: %s", err.Error()), Success: false}, err
 	}
-	return &pb.Result{Message: "Audio generated", Success: true}, nil
+	return &pb.Result{Message: "TTS audio generated", Success: true}, nil
+}
+
+func (s *server) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequest) (*pb.Result, error) {
+	if s.llm.Locking() {
+		s.llm.Lock()
+		defer s.llm.Unlock()
+	}
+	err := s.llm.SoundGeneration(in)
+	if err != nil {
+		return &pb.Result{Message: fmt.Sprintf("Error generating audio: %s", err.Error()), Success: false}, err
+	}
+	return &pb.Result{Message: "Sound Generation audio generated", Success: true}, nil
 }

 func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest) (*pb.TranscriptResult, error) {
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -80,6 +80,9 @@ ENTRY:
 		if e.IsDir() {
 			continue
 		}
+		if strings.HasSuffix(e.Name(), ".log") {
+			continue
+		}

 		// Skip the llama.cpp variants if we are autoDetecting
 		// But we always load the fallback variant if it exists
@@ -265,12 +268,12 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {

 // starts the grpcModelProcess for the backend, and returns a grpc client
 // It also loads the model
-func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (ModelAddress, error) {
-	return func(modelName, modelFile string) (ModelAddress, error) {
+func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*Model, error) {
+	return func(modelName, modelFile string) (*Model, error) {

 		log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelName, modelFile, backend, *o)

-		var client ModelAddress
+		var client *Model

 		getFreeAddress := func() (string, error) {
 			port, err := freeport.GetFreePort()
@@ -294,27 +297,30 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
 		if uri, ok := o.externalBackends[backend]; ok {
 			log.Debug().Msgf("Loading external backend: %s", uri)
 			// check if uri is a file or a address
-			if _, err := os.Stat(uri); err == nil {
+			if fi, err := os.Stat(uri); err == nil {
+				log.Debug().Msgf("external backend is file: %+v", fi)
 				serverAddress, err := getFreeAddress()
 				if err != nil {
-					return "", fmt.Errorf("failed allocating free ports: %s", err.Error())
+					return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
 				}
 				// Make sure the process is executable
 				if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
-					return "", err
+					log.Error().Err(err).Str("path", uri).Msg("failed to launch ")
+					return nil, err
 				}

 				log.Debug().Msgf("GRPC Service Started")

-				client = ModelAddress(serverAddress)
+				client = NewModel(serverAddress)
 			} else {
+				log.Debug().Msg("external backend is uri")
 				// address
-				client = ModelAddress(uri)
+				client = NewModel(uri)
 			}
 		} else {
 			grpcProcess := backendPath(o.assetDir, backend)
 			if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil {
-				return "", fmt.Errorf("grpc process not found in assetdir: %s", err.Error())
+				return nil, fmt.Errorf("grpc process not found in assetdir: %s", err.Error())
 			}

 			if autoDetect {
@@ -326,12 +332,12 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string

 			// Check if the file exists
 			if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
-				return "", fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
+				return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
 			}

 			serverAddress, err := getFreeAddress()
 			if err != nil {
-				return "", fmt.Errorf("failed allocating free ports: %s", err.Error())
+				return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
 			}

 			args := []string{}
@@ -341,12 +347,12 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string

 			// Make sure the process is executable in any circumstance
 			if err := ml.startProcess(grpcProcess, o.model, serverAddress, args...); err != nil {
-				return "", err
+				return nil, err
 			}

 			log.Debug().Msgf("GRPC Service Started")

-			client = ModelAddress(serverAddress)
+			client = NewModel(serverAddress)
 		}

 		// Wait for the service to start up
@@ -366,7 +372,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string

 		if !ready {
 			log.Debug().Msgf("GRPC Service NOT ready")
-			return "", fmt.Errorf("grpc service not ready")
+			return nil, fmt.Errorf("grpc service not ready")
 		}

 		options := *o.gRPCOptions
@@ -377,25 +383,18 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string

 		res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
 		if err != nil {
-			return "", fmt.Errorf("could not load model: %w", err)
+			return nil, fmt.Errorf("could not load model: %w", err)
 		}
 		if !res.Success {
-			return "", fmt.Errorf("could not load model (no success): %s", res.Message)
+			return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
 		}

 		return client, nil
 	}
 }

-func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (grpc.Backend, error) {
-	if parallel {
-		return addr.GRPC(parallel, ml.wd), nil
-	}
-
-	if _, ok := ml.grpcClients[string(addr)]; !ok {
-		ml.grpcClients[string(addr)] = addr.GRPC(parallel, ml.wd)
-	}
-	return ml.grpcClients[string(addr)], nil
+func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error) {
+	return backendsInAssetDir(assetdir)
 }

 func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
@@ -422,7 +421,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
 			log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel")
 			return nil, err
 		}
-
 	}

 	var backendToConsume string
@@ -435,26 +433,28 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
 		backendToConsume = backend
 	}

-	addr, err := ml.LoadModel(o.model, ml.grpcModel(backendToConsume, o))
+	model, err := ml.LoadModel(o.model, ml.grpcModel(backendToConsume, o))
 	if err != nil {
 		return nil, err
 	}

-	return ml.resolveAddress(addr, o.parallelRequests)
+	return model.GRPC(o.parallelRequests, ml.wd), nil
 }

 func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 	o := NewOptions(opts...)

 	ml.mu.Lock()
+
 	// Return earlier if we have a model already loaded
 	// (avoid looping through all the backends)
-	if m := ml.CheckIsLoaded(o.model); m != "" {
+	if m := ml.CheckIsLoaded(o.model); m != nil {
 		log.Debug().Msgf("Model '%s' already loaded", o.model)
 		ml.mu.Unlock()

-		return ml.resolveAddress(m, o.parallelRequests)
+		return m.GRPC(o.parallelRequests, ml.wd), nil
 	}
+
 	// If we can have only one backend active, kill all the others (except external backends)
 	if o.singleActiveBackend {
 		log.Debug().Msgf("Stopping all backends except '%s'", o.model)
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -7,70 +7,32 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
+	"time"

 	"github.com/mudler/LocalAI/pkg/templates"

-	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/mudler/LocalAI/pkg/grpc"
 	"github.com/mudler/LocalAI/pkg/utils"

 	process "github.com/mudler/go-processmanager"
 	"github.com/rs/zerolog/log"
 )

-// Rather than pass an interface{} to the prompt template:
-// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
-// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
-type PromptTemplateData struct {
-	SystemPrompt         string
-	SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
-	Input                string
-	Instruction          string
-	Functions            []functions.Function
-	MessageIndex         int
-}
-
-type ChatMessageTemplateData struct {
-	SystemPrompt string
-	Role         string
-	RoleName     string
-	FunctionName string
-	Content      string
-	MessageIndex int
-	Function     bool
-	FunctionCall interface{}
-	LastMessage  bool
-}
-
 // new idea: what if we declare a struct of these here, and use a loop to check?

 // TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
 type ModelLoader struct {
-	ModelPath string
-	mu        sync.Mutex
-	// TODO: this needs generics
-	grpcClients   map[string]grpc.Backend
-	models        map[string]ModelAddress
+	ModelPath     string
+	mu            sync.Mutex
+	models        map[string]*Model
 	grpcProcesses map[string]*process.Process
 	templates     *templates.TemplateCache
 	wd            *WatchDog
 }

-type ModelAddress string
-
-func (m ModelAddress) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
-	enableWD := false
-	if wd != nil {
-		enableWD = true
-	}
-	return grpc.NewClient(string(m), parallel, wd, enableWD)
-}
-
 func NewModelLoader(modelPath string) *ModelLoader {
 	nml := &ModelLoader{
 		ModelPath:     modelPath,
-		grpcClients:   make(map[string]grpc.Backend),
-		models:        make(map[string]ModelAddress),
+		models:        make(map[string]*Model),
 		templates:     templates.NewTemplateCache(modelPath),
 		grpcProcesses: make(map[string]*process.Process),
 	}
@@ -141,12 +103,24 @@ FILE:
 	return models, nil
 }

-func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (ModelAddress, error)) (ModelAddress, error) {
+func (ml *ModelLoader) ListModels() []*Model {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+
+	models := []*Model{}
+	for _, model := range ml.models {
+		models = append(models, model)
+	}
+
+	return models
+}
+
+func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*Model, error)) (*Model, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()

 	// Check if we already have a loaded model
-	if model := ml.CheckIsLoaded(modelName); model != "" {
+	if model := ml.CheckIsLoaded(modelName); model != nil {
 		return model, nil
 	}

@@ -156,18 +130,15 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (

 	model, err := loader(modelName, modelFile)
 	if err != nil {
-		return "", err
+		return nil, err
 	}

-	// TODO: Add a helper method to iterate all prompt templates associated with a config if and only if it's YAML?
-	// Minor perf loss here until this is fixed, but we initialize on first request
-
-	// // If there is a prompt template, load it
-	// if err := ml.loadTemplateIfExists(modelName); err != nil {
-	// 	return nil, err
-	// }
+	if model == nil {
+		return nil, fmt.Errorf("loader didn't return a model")
+	}

 	ml.models[modelName] = model
+
 	return model, nil
 }

@@ -184,55 +155,40 @@ func (ml *ModelLoader) stopModel(modelName string) error {
 		return fmt.Errorf("model %s not found", modelName)
 	}
 	return nil
-	//return ml.deleteProcess(modelName)
 }

-func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
-	var client grpc.Backend
-	if m, ok := ml.models[s]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", s)
-		if c, ok := ml.grpcClients[s]; ok {
-			client = c
-		} else {
-			client = m.GRPC(false, ml.wd)
+func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
+	m, ok := ml.models[s]
+	if !ok {
+		return nil
+	}
+
+	log.Debug().Msgf("Model already loaded in memory: %s", s)
+	client := m.GRPC(false, ml.wd)
+
+	log.Debug().Msgf("Checking model availability (%s)", s)
+	cTimeout, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
+	defer cancel()
+
+	alive, err := client.HealthCheck(cTimeout)
+	if !alive {
+		log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
+		log.Warn().Msgf("Deleting the process in order to recreate it")
+		process, exists := ml.grpcProcesses[s]
+		if !exists {
+			log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
+			return m
 		}
-		alive, err := client.HealthCheck(context.Background())
-		if !alive {
-			log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
-			log.Warn().Msgf("Deleting the process in order to recreate it")
-			if !ml.grpcProcesses[s].IsAlive() {
-				log.Debug().Msgf("GRPC Process is not responding: %s", s)
-				// stop and delete the process, this forces to re-load the model and re-create again the service
-				err := ml.deleteProcess(s)
-				if err != nil {
-					log.Error().Err(err).Str("process", s).Msg("error stopping process")
-				}
-				return ""
+		if !process.IsAlive() {
+			log.Debug().Msgf("GRPC Process is not responding: %s", s)
+			// stop and delete the process, this forces to re-load the model and re-create again the service
+			err := ml.deleteProcess(s)
+			if err != nil {
+				log.Error().Err(err).Str("process", s).Msg("error stopping process")
 			}
+			return nil
 		}
-
-		return m
 	}

-	return ""
-}
-
-const (
-	ChatPromptTemplate templates.TemplateType = iota
-	ChatMessageTemplate
-	CompletionPromptTemplate
-	EditPromptTemplate
-	FunctionsPromptTemplate
-)
-
-func (ml *ModelLoader) EvaluateTemplateForPrompt(templateType templates.TemplateType, templateName string, in PromptTemplateData) (string, error) {
-	// TODO: should this check be improved?
-	if templateType == ChatMessageTemplate {
-		return "", fmt.Errorf("invalid templateType: ChatMessage")
-	}
-	return ml.templates.EvaluateTemplate(templateType, templateName, in)
-}
-
-func (ml *ModelLoader) EvaluateTemplateForChatMessage(templateName string, messageData ChatMessageTemplateData) (string, error) {
-	return ml.templates.EvaluateTemplate(ChatMessageTemplate, templateName, messageData)
+	return m
 }
--- a/Show More
+++ b/Show More