refactor: move federated server logic to its own service

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-24 16:51:44 -04:00 · 2024-07-18 18:19:56 +02:00
147 changed files with 1224 additions and 3406 deletions
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -6,17 +6,4 @@ VAR=$3

 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")

-# Read $VAR from Makefile (only first match)
-set +e
-CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
-set -e
-
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
-
-if [ -z "$CURRENT_COMMIT" ]; then
-    echo "Could not find $VAR in Makefile."
-    exit 0
-fi
-
-echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$REPO_message.txt"
-echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$REPO_message.txt"
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -40,14 +40,8 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Bump dependencies 🔧
-        id: bump
        run: |
          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
-          {
-            echo 'message<<EOF'
-            cat "${{ matrix.repository }}_message.txt"
-            echo EOF
-          } >> "$GITHUB_OUTPUT"
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v6
        with:
@@ -56,7 +50,7 @@ jobs:
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
          branch: "update/${{ matrix.variable }}"
-          body:  ${{ steps.bump.outputs.message }}
+          body: Bump of ${{ matrix.repository }} version
          signoff: true


--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -41,7 +41,7 @@ jobs:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'chore(model-gallery): :arrow_up: update checksum'
+          title: 'models(gallery): :arrow_up: update checksum'
          branch: "update/checksum"
          body: Updating checksums in gallery/index.yaml
          signoff: true
--- a/.github/workflows/disabled/comment-pr.yaml
+++ b/.github/workflows/disabled/comment-pr.yaml
@@ -8,10 +8,8 @@ jobs:
        MODEL_NAME: hermes-2-theta-llama-3-8b
    runs-on: ubuntu-latest
    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
+    - uses: actions/checkout@v4
      with:
-        ref: "${{ github.event.pull_request.merge_commit_sha }}"
        fetch-depth: 0 # needed to checkout all branches for this Action to work
    - uses: mudler/localai-github-action@v1
      with:
@@ -23,7 +21,6 @@ jobs:
            json_diff_file_output: diff.json
            raw_diff_file_output: diff.txt
            file_output_only: "true"
-            base_branch: ${{ github.event.pull_request.base.sha }}
    - name: Show diff
      env:
        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -47,7 +47,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          # - build-type: 'cublas'
          #   cuda-major-version: "12"
-          #   cuda-minor-version: "0"
+          #   cuda-minor-version: "4"
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -4,8 +4,6 @@ on:
  push:
    branches:
      - master
-    tags:
-      - 'v*'
  pull_request:

 env:
@@ -31,10 +29,11 @@ jobs:
        with:
          go-version: '1.21.x'
          cache: false
+
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
      - name: Install CUDA Dependencies
        run: |
@@ -150,7 +149,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -251,7 +250,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Build stablediffusion
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
+          sudo apt-get install build-essential curl ffmpeg
          sudo apt-get install -y libgmock-dev
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
--- a/4
+++ b/4
@@ -24,7 +24,7 @@ RUN apt-get update && \
        cmake \
        curl \
        git \
-        unzip upx-ucl && \
+        unzip && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers

 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=0
+ARG CUDA_MINOR_VERSION=4

 ENV BUILD_TYPE=${BUILD_TYPE}

--- a/49
+++ b/49
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0d6fb52be0c1b7e77eb855f3adc4952771c8ce4c
+CPPLLAMA_VERSION?=b3283448ce9a5098226afe1d8648ccc578511fe4

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=fe36c909715e6751277ddb020e7892c7670b61d4
+WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')

 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=-s -w
+LD_FLAGS?=
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"

@@ -72,14 +72,6 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
-	UPX=
-else
-	UPX=$(shell which upx)
-endif
-
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1

@@ -385,7 +377,6 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -430,7 +421,7 @@ else
 endif

 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
@@ -480,7 +471,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -742,22 +733,13 @@ backend-assets/grpc: protogen-go replace
 backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bert-embeddings
-endif

 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/gpt4all
-endif

 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/huggingface
-endif

 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -859,50 +841,29 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/llama-ggml
-endif

 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/piper
-endif

 backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/rwkv
-endif

 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion
-endif

 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/tinydream
-endif

 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/whisper
-endif

 backend-assets/grpc/local-store: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/local-store
-endif

 grpcs: prepare $(GRPC_BACKENDS)

--- a/README.md
+++ b/README.md
@@ -84,7 +84,6 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

 Hot topics (looking for contributors):

- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
@@ -151,7 +150,6 @@ Other:

 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)

- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
 - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2259,6 +2259,7 @@ static void params_parse(const backend::ModelOptions* request,
     // get the directory of modelfile
     std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
     params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
+     params.lora_base  =  model_dir + "/"+request->lorabase();
    }
    params.use_mlock = request->mlock();
    params.use_mmap = request->mmap();
--- a/backend/python/autogptq/requirements-cublas11.txt
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/autogptq/requirements-cublas12.txt
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
+torch
 certifi
 transformers
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -122,13 +122,6 @@ function installRequirements() {
        requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
    fi

-    # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
-    if [ "x${BUILD_TYPE}" == "x" ]; then
-        requirementFiles+=("${MY_DIR}/requirements-cpu.txt")
-    fi
-
-    requirementFiles+=("${MY_DIR}/requirements-after.txt")
-
    for reqFile in ${requirementFiles[@]}; do
        if [ -f ${reqFile} ]; then
            echo "starting requirements install for ${reqFile}"
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -3,10 +3,11 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.4
+grpcio==1.65.0
 opencv-python
 pillow
 protobuf
 sentencepiece
+torch
 transformers
 certifi
--- a/backend/python/exllama/requirements-cublas11.txt
+++ b/backend/python/exllama/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/exllama/requirements-cublas12.txt
+++ b/backend/python/exllama/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,5 +1,6 @@
 grpcio==1.65.0
 protobuf
+torch
 transformers
 certifi
 setuptools
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
+torch
 wheel
 setuptools
--- a/backend/python/mamba/requirements-after.txt
+++ b/backend/python/mamba/requirements-after.txt
@@ -1,2 +0,0 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -3,4 +3,5 @@
 # https://github.com/Dao-AILab/causal-conv1d/issues/24
 packaging
 setuptools
-wheel
+wheel
+torch==2.3.1
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,4 +1,6 @@
-grpcio==1.65.1
+causal-conv1d==1.4.0
+mamba-ssm==2.2.2
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/openvoice/requirements-cublas11.txt
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/openvoice/requirements-cublas12.txt
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.4
+grpcio==1.64.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 librosa
 faster-whisper
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh

 # Download checkpoints if not present
 if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
    unzip checkpoints_v2.zip
 fi

--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
+torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
 transformers
--- a/backend/python/petals/requirements-cublas11.txt
+++ b/backend/python/petals/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/petals/requirements-cublas12.txt
+++ b/backend/python/petals/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 sentence-transformers==3.0.1
 transformers
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,6 +1,7 @@
 accelerate
 transformers
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
+torch
 scipy==1.14.0
 certifi
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1 +0,0 @@
-torch
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,3 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,8 +1,9 @@
 accelerate
 transformers
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
+torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements-cublas11.txt
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-torchaudio
--- a/backend/python/vall-e-x/requirements-cublas12.txt
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-torchaudio
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/vllm/requirements-cublas.txt
+++ b/backend/python/vllm/requirements-cublas.txt
@@ -0,0 +1 @@
+flash-attn
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,3 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/cu118
-torch
-flash-attn
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,2 +0,0 @@
-torch
-flash-attn
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.4
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -10,12 +10,10 @@ import (
 type FederatedCLI struct {
 	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
-	LoadBalanced   bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
 }

 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
-
-	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
+	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken)

 	return fs.Start(context.Background())
 }
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -83,9 +83,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			return err
 		}

-		modelURI := downloader.URI(modelName)
-
-		if !modelURI.LooksLikeOCI() {
+		if !downloader.LooksLikeOCI(modelName) {
 			model := gallery.FindModel(models, modelName, mi.ModelsPath)
 			if model == nil {
 				log.Error().Str("model", modelName).Msg("model not found")
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -119,7 +119,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}

 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
 			var tunnelAddresses []string
 			for _, v := range p2p.GetAvailableNodes("") {
 				if v.IsOnline() {
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -86,8 +86,8 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 		var errs error = nil
 		for _, uri := range hfscmd.ToScan {
 			log.Info().Str("uri", uri).Msg("scanning specific uri")
-			scanResults, err := downloader.HuggingFaceScan(downloader.URI(uri))
-			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
+			scanResults, err := downloader.HuggingFaceScan(uri)
+			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
 				errs = errors.Join(errs, err)
 			}
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -8,6 +8,7 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
+	"github.com/mudler/LocalAI/pkg/utils"
 )

 const (
@@ -71,9 +72,9 @@ type BackendConfig struct {
 }

 type File struct {
-	Filename string         `yaml:"filename" json:"filename"`
-	SHA256   string         `yaml:"sha256" json:"sha256"`
-	URI      downloader.URI `yaml:"uri" json:"uri"`
+	Filename string `yaml:"filename" json:"filename"`
+	SHA256   string `yaml:"sha256" json:"sha256"`
+	URI      string `yaml:"uri" json:"uri"`
 }

 type VallE struct {
@@ -212,32 +213,28 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool {
 // MMProjFileName returns the filename of the MMProj file
 // If the MMProj is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) MMProjFileName() string {
-	uri := downloader.URI(c.MMProj)
-	if uri.LooksLikeURL() {
-		f, _ := uri.FilenameFromUrl()
-		return f
+	modelURL := downloader.ConvertURL(c.MMProj)
+	if downloader.LooksLikeURL(modelURL) {
+		return utils.MD5(modelURL)
 	}

 	return c.MMProj
 }

 func (c *BackendConfig) IsMMProjURL() bool {
-	uri := downloader.URI(c.MMProj)
-	return uri.LooksLikeURL()
+	return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj))
 }

 func (c *BackendConfig) IsModelURL() bool {
-	uri := downloader.URI(c.Model)
-	return uri.LooksLikeURL()
+	return downloader.LooksLikeURL(downloader.ConvertURL(c.Model))
 }

 // ModelFileName returns the filename of the model
 // If the model is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) ModelFileName() string {
-	uri := downloader.URI(c.Model)
-	if uri.LooksLikeURL() {
-		f, _ := uri.FilenameFromUrl()
-		return f
+	modelURL := downloader.ConvertURL(c.Model)
+	if downloader.LooksLikeURL(modelURL) {
+		return utils.MD5(modelURL)
 	}

 	return c.Model
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -244,7 +244,7 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)

-			if err := file.URI.DownloadFile(filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
+			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
 				return err
 			}
 		}
@@ -252,10 +252,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 		// If the model is an URL, expand it, and download the file
 		if config.IsModelURL() {
 			modelFileName := config.ModelFileName()
-			uri := downloader.URI(config.Model)
+			modelURL := downloader.ConvertURL(config.Model)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
@@ -269,10 +269,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {

 		if config.IsMMProjURL() {
 			modelFileName := config.MMProjFileName()
-			uri := downloader.URI(config.MMProj)
+			modelURL := downloader.ConvertURL(config.MMProj)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -26,17 +26,15 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
-	RepeatPenalty float64
 }

 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
-		RepeatPenalty: 1.0,
 		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
-			Chat:        "{{.Input }}\n<start_of_turn>model\n",
-			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
+			Chat:        "{{.Input }}\n<|start_of_turn|>model\n",
+			ChatMessage: "<|start_of_turn|>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<|end_of_turn|>",
 			Completion:  "{{.Input}}",
 		},
 	},
@@ -194,9 +192,6 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 		if len(cfg.StopWords) == 0 {
 			cfg.StopWords = settings.StopWords
 		}
-		if cfg.RepeatPenalty == 0.0 {
-			cfg.RepeatPenalty = settings.RepeatPenalty
-		}
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
@@ -224,7 +219,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
 	commandR := arch == "command-r" && eosTokenID == 255001
 	qwen2 := arch == "qwen2"
 	phi3 := arch == "phi-3"
-	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
+	gemma := strings.HasPrefix(f.Model().Name, "gemma")
 	deepseek2 := arch == "deepseek2"

 	switch {
--- a/core/dependencies_manager/manager.go
+++ b/core/dependencies_manager/manager.go
@@ -37,8 +37,7 @@ func main() {

 	// download the assets
 	for _, asset := range assets {
-		uri := downloader.URI(asset.URL)
-		if err := uri.DownloadFile(filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
+		if err := downloader.DownloadFile(asset.URL, filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
 			panic(err)
 		}
 	}
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -131,8 +131,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal

 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
-	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -154,9 +153,8 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
 			return models, err
 		}
 	}
-	uri := downloader.URI(gallery.URL)

-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(gallery.URL, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
@@ -206,34 +204,35 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
 		log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
 	}

-	var filesToRemove []string
-
 	// Remove additional files
 	if galleryconfig != nil {
 		for _, f := range galleryconfig.Files {
 			fullPath := filepath.Join(basePath, f.Filename)
-			filesToRemove = append(filesToRemove, fullPath)
+			log.Debug().Msgf("Removing file %s", fullPath)
+			if e := os.Remove(fullPath); e != nil {
+				err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
+			}
 		}
 	}

 	for _, f := range additionalFiles {
 		fullPath := filepath.Join(filepath.Join(basePath, f))
-		filesToRemove = append(filesToRemove, fullPath)
-	}
-
-	filesToRemove = append(filesToRemove, configFile)
-	filesToRemove = append(filesToRemove, galleryFile)
-
-	// skip duplicates
-	filesToRemove = utils.Unique(filesToRemove)
-
-	// Removing files
-	for _, f := range filesToRemove {
-		if e := os.Remove(f); e != nil {
+		log.Debug().Msgf("Removing additional file %s", fullPath)
+		if e := os.Remove(fullPath); e != nil {
 			err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
 		}
 	}

+	log.Debug().Msgf("Removing model config file %s", configFile)
+
+	// Delete the model config file
+	if e := os.Remove(configFile); e != nil {
+		err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
+	}
+
+	// Delete gallery config file
+	os.Remove(galleryFile)
+
 	return err
 }

@@ -254,8 +253,8 @@ func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error

 func SafetyScanGalleryModel(galleryModel *GalleryModel) error {
 	for _, file := range galleryModel.AdditionalFiles {
-		scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
-		if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
+		scanResults, err := downloader.HuggingFaceScan(file.URI)
+		if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 			log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 			return err
 		}
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -68,8 +68,7 @@ type PromptTemplate struct {

 func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
-	uri := downloader.URI(url)
-	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
+	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -119,14 +118,14 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 		filePath := filepath.Join(basePath, file.Filename)

 		if enforceScan {
-			scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
-			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
+			scanResults, err := downloader.HuggingFaceScan(file.URI)
+			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
 				log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 				return err
 			}
 		}
-		uri := downloader.URI(file.URI)
-		if err := uri.DownloadFile(filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
+
+		if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
 			return err
 		}
 	}
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -73,9 +73,8 @@ func getModelStatus(url string) (response map[string]interface{}) {
 }

 func getModels(url string) (response []gallery.GalleryModel) {
-	uri := downloader.URI(url)
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
+	downloader.DownloadAndUnmarshal(url, "", func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -9,6 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/xsync"
 )

 const (
@@ -371,12 +372,7 @@ func dropBadChars(s string) string {
 	return strings.ReplaceAll(s, "@", "__")
 }

-type ProcessTracker interface {
-	Exists(string) bool
-	Get(string) string
-}
-
-func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
+func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
 	modelsElements := []elem.Node{}
 	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
 		return elem.Div(
@@ -400,7 +396,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g

 	actionDiv := func(m *gallery.GalleryModel) elem.Node {
 		galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
-		currentlyProcessing := processTracker.Exists(galleryID)
+		currentlyProcessing := processing.Exists(galleryID)
 		jobID := ""
 		isDeletionOp := false
 		if currentlyProcessing {
@@ -408,7 +404,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 			if status != nil && status.Deletion {
 				isDeletionOp = true
 			}
-			jobID = processTracker.Get(galleryID)
+			jobID = processing.Get(galleryID)
 			// TODO:
 			// case not handled, if status == nil : "Waiting"
 		}
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -17,10 +17,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		backendConfigs := cl.GetAllBackendConfigs()

 		galleryConfigs := map[string]*gallery.Config{}
-		modelsWithBackendConfig := map[string]interface{}{}
-
 		for _, m := range backendConfigs {
-			modelsWithBackendConfig[m.Name] = nil

 			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
 			if err != nil {
@@ -35,7 +32,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		modelsWithoutConfig := []string{}

 		for _, m := range models {
-			if _, ok := modelsWithBackendConfig[m]; !ok {
+			if _, ok := galleryConfigs[m]; !ok {
 				modelsWithoutConfig = append(modelsWithoutConfig, m)
 			}
 		}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -225,16 +225,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			}

 			// Update input grammar
-			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
-			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
-			if err == nil {
-				config.Grammar = g
+			// Handle if we should return "name" instead of "functions"
+			if config.FunctionsConfig.FunctionName {
+				jsStruct := funcs.ToJSONNameStructure()
+				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			} else {
+				jsStruct := funcs.ToJSONFunctionStructure()
+				config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 			}
 		case input.JSONFunctionGrammarObject != nil:
-			g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
-			if err == nil {
-				config.Grammar = g
-			}
+			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+		case input.JSONFunctionGrammarObjectName != nil:
+			config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -21,40 +21,6 @@ import (
 	"github.com/google/uuid"
 )

-type modelOpCache struct {
-	status *xsync.SyncedMap[string, string]
-}
-
-func NewModelOpCache() *modelOpCache {
-	return &modelOpCache{
-		status: xsync.NewSyncedMap[string, string](),
-	}
-}
-
-func (m *modelOpCache) Set(key string, value string) {
-	m.status.Set(key, value)
-}
-
-func (m *modelOpCache) Get(key string) string {
-	return m.status.Get(key)
-}
-
-func (m *modelOpCache) DeleteUUID(uuid string) {
-	for _, k := range m.status.Keys() {
-		if m.status.Get(k) == uuid {
-			m.status.Delete(k)
-		}
-	}
-}
-
-func (m *modelOpCache) Map() map[string]string {
-	return m.status.Map()
-}
-
-func (m *modelOpCache) Exists(key string) bool {
-	return m.status.Exists(key)
-}
-
 func RegisterUIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
@@ -63,7 +29,7 @@ func RegisterUIRoutes(app *fiber.App,
 	auth func(*fiber.Ctx) error) {

 	// keeps the state of models that are being installed from the UI
-	var processingModels = NewModelOpCache()
+	var processingModels = xsync.NewSyncedMap[string, string]()

 	// modelStatus returns the current status of the models being processed (installation or deletion)
 	// it is called asynchonously from the UI
@@ -266,8 +232,6 @@ func RegisterUIRoutes(app *fiber.App,
 			return c.SendString(elements.ProgressBar("100"))
 		}
 		if status.Error != nil {
-			// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
-			processingModels.DeleteUUID(jobUID)
 			return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
 		}

@@ -282,7 +246,12 @@ func RegisterUIRoutes(app *fiber.App,
 		status := galleryService.GetStatus(jobUID)

 		galleryID := ""
-		processingModels.DeleteUUID(jobUID)
+		for _, k := range processingModels.Keys() {
+			if processingModels.Get(k) == jobUID {
+				galleryID = k
+				processingModels.Delete(k)
+			}
+		}
 		if galleryID == "" {
 			log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
 		}
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -16,16 +16,7 @@
                </a> 
            </h2> 
            <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
-            <!-- Warning box if p2p token is empty and p2p is enabled -->
-            {{ if and .IsP2PEnabled (eq .P2PToken "") }}
-            <div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
-                <p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
-                <p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
-                    Check out the documentation for more information.
-                </a> </p>
-            </div>
-            {{ else }}
-
+            
            <!-- Federation Box -->
            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">

@@ -137,8 +128,7 @@
                    </div>
                </div>
            </div>
-            <!-- Llama.cpp Box END -->    
-            {{ end }}   
+            <!-- Llama.cpp Box END -->       
        </div>
    </div>

--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -1,47 +0,0 @@
-package p2p
-
-const FederatedID = "federated"
-
-type FederatedServer struct {
-	listenAddr, service, p2ptoken string
-	requestTable                  map[string]int
-	loadBalanced                  bool
-}
-
-func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool) *FederatedServer {
-	return &FederatedServer{
-		listenAddr:   listenAddr,
-		service:      service,
-		p2ptoken:     p2pToken,
-		requestTable: map[string]int{},
-		loadBalanced: loadBalanced,
-	}
-}
-
-func (fs *FederatedServer) SelectLeastUsedServer() string {
-	// cycle over requestTable and find the entry with the lower number
-	// if there are multiple entries with the same number, select one randomly
-	// if there are no entries, return an empty string
-	var min int
-	var minKey string
-	for k, v := range fs.requestTable {
-		if min == 0 || v < min {
-			min = v
-			minKey = k
-		}
-	}
-	return minKey
-}
-
-func (fs *FederatedServer) RecordRequest(nodeID string) {
-	// increment the counter for the nodeID in the requestTable
-	fs.requestTable[nodeID]++
-}
-
-func (fs *FederatedServer) EnsureRecordExist(nodeID string) {
-	// if the nodeID is not in the requestTable, add it with a counter of 0
-	_, ok := fs.requestTable[nodeID]
-	if !ok {
-		fs.requestTable[nodeID] = 0
-	}
-}
--- a/core/p2p/federatedServer.go
+++ b/core/p2p/federatedServer.go
@@ -0,0 +1,13 @@
+package p2p
+
+type FederatedServer struct {
+	listenAddr, service, p2ptoken string
+}
+
+func NewFederatedServer(listenAddr, service, p2pToken string) *FederatedServer {
+	return &FederatedServer{
+		listenAddr: listenAddr,
+		service:    service,
+		p2ptoken:   p2pToken,
+	}
+}
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -7,35 +7,34 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
 	"net"
 	"time"

+	"github.com/rs/zerolog/log"
+
 	"math/rand/v2"

 	"github.com/mudler/edgevpn/pkg/node"
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/types"
-	"github.com/rs/zerolog/log"
 )

-func (f *FederatedServer) Start(ctx context.Context) error {
-
-	n, err := NewNode(f.p2ptoken)
+func (fs *FederatedServer) Start(ctx context.Context) error {
+	n, err := NewNode(fs.p2ptoken)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
 	}
 	err = n.Start(ctx)
 	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
+		return fmt.Errorf("starting a new node: %w", err)
 	}

-	if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
-		log.Debug().Msgf("Discovered node: %s", tunnel.ID)
-	}); err != nil {
+	if err := ServiceDiscoverer(ctx, n, fs.p2ptoken, FederatedID, nil); err != nil {
 		return err
 	}

-	return f.proxy(ctx, n)
+	return fs.proxy(ctx, n)
 }

 func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
@@ -85,56 +84,44 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 			}

 			// Handle connections in a new goroutine, forwarding to the p2p service
-			go func() {
-				var tunnelAddresses []string
-				for _, v := range GetAvailableNodes(fs.service) {
-					if v.IsOnline() {
-						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-					} else {
-						log.Info().Msgf("Node %s is offline", v.ID)
-					}
-				}
-
-				if len(tunnelAddresses) == 0 {
-					log.Error().Msg("No available nodes yet")
-					return
-				}
-
-				tunnelAddr := ""
-
-				if fs.loadBalanced {
-					for _, t := range tunnelAddresses {
-						fs.EnsureRecordExist(t)
-					}
-
-					tunnelAddr = fs.SelectLeastUsedServer()
-					log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
-					if tunnelAddr == "" {
-						tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-					}
-
-					fs.RecordRequest(tunnelAddr)
-				} else {
-					tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-				}
-
-				tunnelConn, err := net.Dial("tcp", tunnelAddr)
-				if err != nil {
-					log.Error().Err(err).Msg("Error connecting to tunnel")
-					return
-				}
-
-				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
-				closer := make(chan struct{}, 2)
-				go copyStream(closer, tunnelConn, conn)
-				go copyStream(closer, conn, tunnelConn)
-				<-closer
-
-				tunnelConn.Close()
-				conn.Close()
-				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
-			}()
+			go handleConn(conn)
 		}
 	}

 }
+
+func handleConn(conn net.Conn) {
+	var tunnelAddresses []string
+	for _, v := range GetAvailableNodes(FederatedID) {
+		if v.IsOnline() {
+			tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+		} else {
+			log.Info().Msgf("Node %s is offline", v.ID)
+		}
+	}
+
+	// open a TCP stream to one of the tunnels
+	// chosen randomly
+	// TODO: optimize this and track usage
+	tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
+
+	tunnelConn, err := net.Dial("tcp", tunnelAddr)
+	if err != nil {
+		log.Error().Err(err).Msg("Error connecting to tunnel")
+		return
+	}
+
+	log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
+	closer := make(chan struct{}, 2)
+	go copyStream(closer, tunnelConn, conn)
+	go copyStream(closer, conn, tunnelConn)
+	<-closer
+
+	tunnelConn.Close()
+	conn.Close()
+}
+
+func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
+	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
+	io.Copy(dst, src)
+}
--- a/core/p2p/node.go
+++ b/core/p2p/node.go
@@ -6,6 +6,7 @@ import (
 )

 const defaultServicesID = "services_localai"
+const FederatedID = "federated"

 type NodeData struct {
 	Name          string
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -7,7 +7,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"io"
 	"net"
 	"os"
 	"sync"
@@ -139,7 +138,7 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv

 // This is the main of the server (which keeps the env variable updated)
 // This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
-func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData)) error {
+func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func()) error {
 	if servicesID == "" {
 		servicesID = defaultServicesID
 	}
@@ -161,7 +160,7 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
 			case tunnel := <-tunnels:
 				AddNode(servicesID, tunnel)
 				if discoveryFunc != nil {
-					discoveryFunc(servicesID, tunnel)
+					discoveryFunc()
 				}
 			}
 		}
@@ -391,8 +390,3 @@ func newNodeOpts(token string) ([]node.Option, error) {

 	return nodeOpts, nil
 }
-
-func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
-	defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
-	io.Copy(dst, src)
-}
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -14,11 +14,11 @@ func GenerateToken() string {
 	return "not implemented"
 }

-func (f *FederatedServer) Start(ctx context.Context) error {
+func (fs *FederatedServer) Start(ctx context.Context) error {
 	return fmt.Errorf("not implemented")
 }

-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData)) error {
+func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func()) error {
 	return fmt.Errorf("not implemented")
 }

--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -179,7 +179,8 @@ type OpenAIRequest struct {
 	// A grammar to constrain the LLM output
 	Grammar string `json:"grammar" yaml:"grammar"`

-	JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
+	JSONFunctionGrammarObject     *functions.JSONFunctionStructureFunction `json:"grammar_json_functions" yaml:"grammar_json_functions"`
+	JSONFunctionGrammarObjectName *functions.JSONFunctionStructureName     `json:"grammar_json_name" yaml:"grammar_json_name"`

 	Backend string `json:"backend" yaml:"backend"`

--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -152,8 +152,7 @@ function:
    replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
    replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.
    capture_llm_results: [] # Capture language model results as text result, among JSON, in function calls. For instance, if a model returns a block for "thinking" and a block for "response", this will allow you to capture the thinking block.
-    function_name_key: "name"
-    function_arguments_key: "arguments"
+    return_name_in_function_response: false # Some models might prefer to use "name" rather then "function" when returning JSON data. This will allow to use "name" as a key in the JSON response.

 # Feature gating flags to enable experimental or optional features.
 feature_flags: {}
--- a/Show More
+++ b/Show More