debu2

debug
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-03 03:02:38 -05:00 · 2024-07-22 12:21:55 +02:00 · 2024-07-22 10:17:41 +02:00
70 changed files with 544 additions and 1987 deletions
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -41,7 +41,7 @@ jobs:
          token: ${{ secrets.UPDATE_BOT_TOKEN }}
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'chore(model-gallery): :arrow_up: update checksum'
+          title: 'models(gallery): :arrow_up: update checksum'
          branch: "update/checksum"
          body: Updating checksums in gallery/index.yaml
          signoff: true
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -47,7 +47,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          # - build-type: 'cublas'
          #   cuda-major-version: "12"
-          #   cuda-minor-version: "0"
+          #   cuda-minor-version: "4"
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "4"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,10 +31,11 @@ jobs:
        with:
          go-version: '1.21.x'
          cache: false
+
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
          sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
      - name: Install CUDA Dependencies
        run: |
@@ -150,7 +151,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
      - name: Intel Dependencies
        run: |
          wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -251,7 +252,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
      - name: Build stablediffusion
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
+          sudo apt-get install build-essential curl ffmpeg
          sudo apt-get install -y libgmock-dev
          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
--- a/4
+++ b/4
@@ -24,7 +24,7 @@ RUN apt-get update && \
        cmake \
        curl \
        git \
-        unzip upx-ucl && \
+        unzip && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers

 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=0
+ARG CUDA_MINOR_VERSION=4

 ENV BUILD_TYPE=${BUILD_TYPE}

--- a/48
+++ b/48
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=ed9d2854c9de4ae1f448334294e61167b04bec2a
+CPPLLAMA_VERSION?=07283b1a90e1320aae4762c7e03c879043910252

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=6739eb83c3ca5cf40d24c6fe8442a761a1eb6248
+WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3

 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')

 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=-s -w
+LD_FLAGS?=
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"

@@ -72,14 +72,6 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

-UPX?=
-# check if upx exists
-ifeq (, $(shell which upx))
-	UPX=
-else
-	UPX=$(shell which upx)
-endif
-
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1

@@ -385,7 +377,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
-	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
+	ls -liah backend-assets/grpc
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -480,7 +472,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -742,22 +734,13 @@ backend-assets/grpc: protogen-go replace
 backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bert-embeddings
-endif

 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/gpt4all
-endif

 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/huggingface
-endif

 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -859,50 +842,29 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/llama-ggml
-endif

 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/piper
-endif

 backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/rwkv
-endif

 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/stablediffusion
-endif

 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/tinydream
-endif

 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/whisper
-endif

 backend-assets/grpc/local-store: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/local-store
-endif

 grpcs: prepare $(GRPC_BACKENDS)

--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2259,6 +2259,7 @@ static void params_parse(const backend::ModelOptions* request,
     // get the directory of modelfile
     std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
     params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
+     params.lora_base  =  model_dir + "/"+request->lorabase();
    }
    params.use_mlock = request->mlock();
    params.use_mmap = request->mmap();
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 torch
 certifi
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -3,7 +3,7 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.1
+grpcio==1.65.0
 opencv-python
 pillow
 protobuf
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
 torch
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,6 +1,6 @@
 causal-conv1d==1.4.0
 mamba-ssm==2.2.2
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.1
+grpcio==1.64.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 librosa
 faster-whisper
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh

 # Download checkpoints if not present
 if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
    unzip checkpoints_v2.zip
 fi

--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 sentence-transformers==3.0.1
 transformers
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 transformers
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 torch
 scipy==1.14.0
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,3 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,9 +1,9 @@
 accelerate
 transformers
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.1
+grpcio==1.65.0
 protobuf
 certifi
 transformers
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -204,34 +204,35 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
 		log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
 	}

-	var filesToRemove []string
-
 	// Remove additional files
 	if galleryconfig != nil {
 		for _, f := range galleryconfig.Files {
 			fullPath := filepath.Join(basePath, f.Filename)
-			filesToRemove = append(filesToRemove, fullPath)
+			log.Debug().Msgf("Removing file %s", fullPath)
+			if e := os.Remove(fullPath); e != nil {
+				err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
+			}
 		}
 	}

 	for _, f := range additionalFiles {
 		fullPath := filepath.Join(filepath.Join(basePath, f))
-		filesToRemove = append(filesToRemove, fullPath)
-	}
-
-	filesToRemove = append(filesToRemove, configFile)
-	filesToRemove = append(filesToRemove, galleryFile)
-
-	// skip duplicates
-	filesToRemove = utils.Unique(filesToRemove)
-
-	// Removing files
-	for _, f := range filesToRemove {
-		if e := os.Remove(f); e != nil {
+		log.Debug().Msgf("Removing additional file %s", fullPath)
+		if e := os.Remove(fullPath); e != nil {
 			err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
 		}
 	}

+	log.Debug().Msgf("Removing model config file %s", configFile)
+
+	// Delete the model config file
+	if e := os.Remove(configFile); e != nil {
+		err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
+	}
+
+	// Delete gallery config file
+	os.Remove(galleryFile)
+
 	return err
 }

--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -9,6 +9,7 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
+	"github.com/mudler/LocalAI/pkg/xsync"
 )

 const (
@@ -371,12 +372,7 @@ func dropBadChars(s string) string {
 	return strings.ReplaceAll(s, "@", "__")
 }

-type ProcessTracker interface {
-	Exists(string) bool
-	Get(string) string
-}
-
-func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
+func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
 	modelsElements := []elem.Node{}
 	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
 		return elem.Div(
@@ -400,7 +396,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g

 	actionDiv := func(m *gallery.GalleryModel) elem.Node {
 		galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
-		currentlyProcessing := processTracker.Exists(galleryID)
+		currentlyProcessing := processing.Exists(galleryID)
 		jobID := ""
 		isDeletionOp := false
 		if currentlyProcessing {
@@ -408,7 +404,7 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 			if status != nil && status.Deletion {
 				isDeletionOp = true
 			}
-			jobID = processTracker.Get(galleryID)
+			jobID = processing.Get(galleryID)
 			// TODO:
 			// case not handled, if status == nil : "Waiting"
 		}
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -226,15 +226,9 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup

 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
-			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
-			if err == nil {
-				config.Grammar = g
-			}
+			config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		case input.JSONFunctionGrammarObject != nil:
-			g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
-			if err == nil {
-				config.Grammar = g
-			}
+			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -21,40 +21,6 @@ import (
 	"github.com/google/uuid"
 )

-type modelOpCache struct {
-	status *xsync.SyncedMap[string, string]
-}
-
-func NewModelOpCache() *modelOpCache {
-	return &modelOpCache{
-		status: xsync.NewSyncedMap[string, string](),
-	}
-}
-
-func (m *modelOpCache) Set(key string, value string) {
-	m.status.Set(key, value)
-}
-
-func (m *modelOpCache) Get(key string) string {
-	return m.status.Get(key)
-}
-
-func (m *modelOpCache) DeleteUUID(uuid string) {
-	for _, k := range m.status.Keys() {
-		if m.status.Get(k) == uuid {
-			m.status.Delete(k)
-		}
-	}
-}
-
-func (m *modelOpCache) Map() map[string]string {
-	return m.status.Map()
-}
-
-func (m *modelOpCache) Exists(key string) bool {
-	return m.status.Exists(key)
-}
-
 func RegisterUIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
@@ -63,7 +29,7 @@ func RegisterUIRoutes(app *fiber.App,
 	auth func(*fiber.Ctx) error) {

 	// keeps the state of models that are being installed from the UI
-	var processingModels = NewModelOpCache()
+	var processingModels = xsync.NewSyncedMap[string, string]()

 	// modelStatus returns the current status of the models being processed (installation or deletion)
 	// it is called asynchonously from the UI
@@ -266,8 +232,6 @@ func RegisterUIRoutes(app *fiber.App,
 			return c.SendString(elements.ProgressBar("100"))
 		}
 		if status.Error != nil {
-			// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
-			processingModels.DeleteUUID(jobUID)
 			return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
 		}

@@ -282,7 +246,12 @@ func RegisterUIRoutes(app *fiber.App,
 		status := galleryService.GetStatus(jobUID)

 		galleryID := ""
-		processingModels.DeleteUUID(jobUID)
+		for _, k := range processingModels.Keys() {
+			if processingModels.Get(k) == jobUID {
+				galleryID = k
+				processingModels.Delete(k)
+			}
+		}
 		if galleryID == "" {
 			log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
 		}
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -16,16 +16,7 @@
                </a> 
            </h2> 
            <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
-            <!-- Warning box if p2p token is empty and p2p is enabled -->
-            {{ if and .IsP2PEnabled (eq .P2PToken "") }}
-            <div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
-                <p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
-                <p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
-                    Check out the documentation for more information.
-                </a> </p>
-            </div>
-            {{ else }}
-
+            
            <!-- Federation Box -->
            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">

@@ -137,8 +128,7 @@
                    </div>
                </div>
            </div>
-            <!-- Llama.cpp Box END -->    
-            {{ end }}   
+            <!-- Llama.cpp Box END -->       
        </div>
    </div>

--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -11,7 +11,7 @@ This functionality enables LocalAI to distribute inference requests across multi
 LocalAI supports two modes of distributed inferencing via p2p:

 - **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
+- **Worker Mode**: Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).

 ## Usage

@@ -122,6 +122,12 @@ The server logs should indicate that new workers are being discovered.

 ![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)

+## Notes
+
+- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
+- Only a single model is supported currently.
+- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
+- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)

 ## Environment Variables

@@ -132,20 +138,3 @@ There are options that can be tweaked or parameters that can be set using enviro
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
-
-## Architecture
-
-LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers. 
-
-[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
-
-The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
-
-## Notes
-
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
- Only a single model is supported currently.
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
-
-
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.3"
+  "version": "v2.19.1"
 }
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -194,7 +194,7 @@ install_container_toolkit_yum() {
    curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
    $SUDO  tee /etc/yum.repos.d/nvidia-container-toolkit.repo

-    if [ "$PACKAGE_MANAGER" = "dnf" ]; then
+    if [ "$PACKAGE_MANAGER" == "dnf" ]; then
        $SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
    else 
        $SUDO $PACKAGE_MANAGER -y install yum-utils
@@ -629,7 +629,7 @@ case "$ARCH" in
    *) fatal "Unsupported architecture: $ARCH" ;;
 esac

-if [ "$OS" = "Darwin" ]; then
+if [ "$OS" == "Darwin" ]; then
    install_binary_darwin
    exit 0
 fi
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,6 +1,6 @@
-llama_index==0.10.56
+llama_index==0.10.55
 requests==2.32.3
-weaviate_client==4.6.7
+weaviate_client==4.6.5
 transformers
 torch
 chainlit
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.10
-openai==1.37.0
+langchain==0.2.8
+openai==1.35.13
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.10
-openai==1.37.0
-chromadb==0.5.5
-llama-index==0.10.56
+langchain==0.2.8
+openai==1.35.13
+chromadb==0.5.4
+llama-index==0.10.55
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -10,21 +10,21 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.11
-langchain-community==0.2.9
+langchain==0.2.8
+langchain-community==0.2.7
 marshmallow==3.21.3
 marshmallow-enum==1.5.1
 multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.0.1
-openai==1.37.1
+numpy==1.26.4
+openai==1.35.13
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2
 PyYAML==6.0.1
 requests==2.32.3
-SQLAlchemy==2.0.31
+SQLAlchemy==2.0.30
 tenacity==8.5.0
 tqdm==4.66.4
 typing-inspect==0.9.0
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.37.0
+streamlit==1.36.0
 requests
--- a/gallery/alpaca.yaml
+++ b/gallery/alpaca.yaml
@@ -1,17 +0,0 @@
---
-name: "alpaca"
-
-config_file: |
-  context_size: 4096
-  f16: true
-  mmap: true
-  template:
-    chat: |
-        Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-        ### Instruction:
-        {{.Input}}
-
-        ### Response:
-    completion: |
-        {{.Input}}
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,376 +1,6 @@
 ---
-## LLama3.1
- &llama31
-  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
-  name: "meta-llama-3.1-8b-instruct"
-  license: llama3.1
-  description: |
-    The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
-
-    Model developer: Meta
-
-    Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
-  urls:
-    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
-    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
-  tags:
-    - llm
-    - gguf
-    - gpu
-    - cpu
-    - llama3.1
-  overrides:
-    parameters:
-      model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
-  files:
-    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
-      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
-      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "meta-llama-3.1-70b-instruct"
-  urls:
-    - https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct
-    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF
-  overrides:
-    parameters:
-      model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
-  files:
-    - filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
-      sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
-      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "meta-llama-3.1-8b-instruct:grammar-functioncall"
-  url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
-  urls:
-    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
-    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
-  description: |
-    This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
-
-    When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
-    For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
-  overrides:
-    parameters:
-      model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
-  files:
-    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
-      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
-      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "meta-llama-3.1-8b-claude-imat"
-  urls:
-    - https://huggingface.co/Undi95/Meta-Llama-3.1-8B-Claude
-    - https://huggingface.co/InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF
-  description: |
-    Meta-Llama-3.1-8B-Claude-iMat-GGUF: Quantized from Meta-Llama-3.1-8B-Claude fp16. Weighted quantizations were creating using fp16 GGUF and groups_merged.txt in 88 chunks and n_ctx=512. Static fp16 will also be included in repo. For a brief rundown of iMatrix quant performance, please see this PR. All quants are verified working prior to uploading to repo for your safety and convenience.
-  overrides:
-    parameters:
-      model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
-  files:
-    - filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
-      uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
-      sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
- !!merge <<: *llama31
-  name: "meta-llama-3.1-8b-instruct-abliterated"
-  icon: https://i.imgur.com/KhorYYG.png
-  urls:
-    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
-    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
-  description: |
-    This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
-  overrides:
-    parameters:
-      model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
-  files:
-    - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
-      uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
-      sha256: 2e1fd6d93b19cc6548b2b8ed2d3f1f34b432ee0573f3dcf358bbaab4f23c760b
- !!merge <<: *llama31
-  name: "llama-3.1-70b-japanese-instruct-2407"
-  urls:
-    - https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
-    - https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
-  description: |
-    The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
-  overrides:
-    parameters:
-      model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
-  files:
-    - filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
-      sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
-      uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "openbuddy-llama3.1-8b-v22.1-131k"
-  icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
-  urls:
-    - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
-  description: |
-    OpenBuddy - Open Multilingual Chatbot
-  overrides:
-    parameters:
-      model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
-  files:
-    - filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
-      sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
-      uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
- !!merge <<: *llama31
-  name: "llama3.1-8b-fireplace2"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/JYkaXrk2DqpXhaL9WymKY.jpeg
-  urls:
-    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Fireplace2
-    - https://huggingface.co/mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF
-  description: |
-    Fireplace 2 is a chat model, adding helpful structured outputs to Llama 3.1 8b Instruct.
-
-        an expansion pack of supplementary outputs - request them at will within your chat:
-            Inline function calls
-            SQL queries
-            JSON objects
-            Data visualization with matplotlib
-        Mix normal chat and structured outputs within the same conversation.
-        Fireplace 2 supplements the existing strengths of Llama 3.1, providing inline capabilities within the Llama 3 Instruct format.
-
-    Version
-
-    This is the 2024-07-23 release of Fireplace 2 for Llama 3.1 8b.
-
-    We're excited to bring further upgrades and releases to Fireplace 2 in the future.
-
-    Help us and recommend Fireplace 2 to your friends!
-  overrides:
-    parameters:
-      model: llama3.1-8b-fireplace2-q4_k_m.gguf
-  files:
-    - filename: llama3.1-8b-fireplace2-q4_k_m.gguf
-      sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
-      uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
- !!merge <<: *llama31
-  name: "sekhmet_aleph-l3.1-8b-v0.1-i1"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/SVyiW4mu495ngqszJGWRl.png
-  urls:
-    - https://huggingface.co/Nitral-Archive/Sekhmet_Aleph-L3.1-8B-v0.1
-    - https://huggingface.co/mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF
-  overrides:
-    parameters:
-      model: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
-  files:
-    - filename: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
-      sha256: 5b6f4eaa2091bf13a2b563a54a3f87b22efa7f2862362537c956c70da6e11cea
-      uri: huggingface://mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF/Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "l3.1-8b-llamoutcast-i1"
-  icon: https://files.catbox.moe/ecgn0m.jpg
-  urls:
-    - https://huggingface.co/Envoid/L3.1-8B-Llamoutcast
-    - https://huggingface.co/mradermacher/L3.1-8B-Llamoutcast-i1-GGUF
-  description: |
-    Warning: this model is utterly cursed.
-    Llamoutcast
-
-    This model was originally intended to be a DADA finetune of Llama-3.1-8B-Instruct but the results were unsatisfactory. So it received some additional finetuning on a rawtext dataset and now it is utterly cursed.
-
-    It responds to Llama-3 Instruct formatting.
-  overrides:
-    parameters:
-      model: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
-  files:
-    - filename: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
-      sha256: 438ca0a7e9470f5ee40f3b14dc2da41b1cafc4ad4315dead3eb57924109d5cf6
-      uri: huggingface://mradermacher/L3.1-8B-Llamoutcast-i1-GGUF/L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "llama-guard-3-8b"
-  urls:
-    - https://huggingface.co/meta-llama/Llama-Guard-3-8B
-    - https://huggingface.co/QuantFactory/Llama-Guard-3-8B-GGUF
-  description: |
-    Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
-
-    Llama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.
-  overrides:
-    parameters:
-      model: Llama-Guard-3-8B.Q4_K_M.gguf
-  files:
-    - filename: Llama-Guard-3-8B.Q4_K_M.gguf
-      sha256: c5ea8760a1e544eea66a8915fcc3fbd2c67357ea2ee6871a9e6a6c33b64d4981
-      uri: huggingface://QuantFactory/Llama-Guard-3-8B-GGUF/Llama-Guard-3-8B.Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "genius-llama3.1-i1"
-  icon: https://github.com/fangyuan-ksgk/GeniusUpload/assets/66006349/7272c93e-9806-461c-a3d0-2e50ef2b7af0
-  urls:
-    - https://huggingface.co/Ksgk-fy/Genius-Llama3.1
-    - https://huggingface.co/mradermacher/Genius-Llama3.1-i1-GGUF
-  description: |
-    Finetuned Llama-3.1 base on Lex Fridman's podcast transcript.
-  overrides:
-    parameters:
-      model: Genius-Llama3.1.i1-Q4_K_M.gguf
-  files:
-    - filename: Genius-Llama3.1.i1-Q4_K_M.gguf
-      sha256: a272bb2a6ab7ed565738733fb8af8e345b177eba9e76ce615ea845c25ebf8cd5
-      uri: huggingface://mradermacher/Genius-Llama3.1-i1-GGUF/Genius-Llama3.1.i1-Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "llama3.1-8b-chinese-chat"
-  urls:
-    - https://huggingface.co/shenzhi-wang/Llama3.1-8B-Chinese-Chat
-    - https://huggingface.co/QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF
-  description: |
-    llama3.1-8B-Chinese-Chat is an instruction-tuned language model for Chinese & English users with various abilities such as roleplaying & tool-using built upon the Meta-Llama-3.1-8B-Instruct model. Developers: [Shenzhi Wang](https://shenzhi-wang.netlify.app)*, [Yaowei Zheng](https://github.com/hiyouga)*, Guoyin Wang (in.ai), Shiji Song, Gao Huang. (*: Equal Contribution) - License: [Llama-3.1 License](https://huggingface.co/meta-llama/Meta-Llla...
-    m-3.1-8B/blob/main/LICENSE) - Base Model: Meta-Llama-3.1-8B-Instruct - Model Size: 8.03B - Context length: 128K(reported by [Meta-Llama-3.1-8B-Instruct model](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), untested for our Chinese model)
-  overrides:
-    parameters:
-      model: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
-  files:
-    - filename: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
-      sha256: 824847b6cca82c4d60107c6a059d80ba975a68543e6effd98880435436ddba06
-      uri: huggingface://QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF/Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "llama3.1-70b-chinese-chat"
-  urls:
-    - https://huggingface.co/shenzhi-wang/Llama3.1-70B-Chinese-Chat
-    - https://huggingface.co/mradermacher/Llama3.1-70B-Chinese-Chat-GGUF
-  description: |
-    "Llama3.1-70B-Chinese-Chat" is a 70-billion parameter large language model pre-trained on a large corpus of Chinese text data. It is designed for chat and dialog applications, and can generate human-like responses to various prompts and inputs. The model is based on the Llama3.1 architecture and has been fine-tuned for Chinese language understanding and generation. It can be used for a wide range of natural language processing tasks, including language translation, text summarization, question answering, and more.
-  overrides:
-    parameters:
-      model: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
-  files:
-    - filename: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
-      sha256: 395cff3cce2b092f840b68eb6e31f4c8b670bc8e3854bbb230df8334369e671d
-      uri: huggingface://mradermacher/Llama3.1-70B-Chinese-Chat-GGUF/Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
-## Uncensored models
- !!merge <<: *llama31
-  name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
-  icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
-  urls:
-    - https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
-    - https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
-  description: |
-    The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
-
-        Saving money(LLama 3.1)
-        only test en.
-        Input Models input text only. Output Models generate text and code only.
-        Uncensored
-        Quick response
-        A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
-        DarkIdol:Roles that you can imagine and those that you cannot imagine.
-        Roleplay
-        Specialized in various role-playing scenarios
-
-    How To
-
-        System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
-  overrides:
-    parameters:
-      model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
-  files:
-    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
-      uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
-      sha256: 9632316d735365087f36083dec320a71995650deb86cf74f39ab071e43114eb8
- !!merge <<: *llama31
-  name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png
-  urls:
-    - https://huggingface.co/LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request
-  description: |
-    Uncensored
-    virtual idol Twitter
-
-        https://x.com/aifeifei799
-
-    Questions
-
-        The model's response results are for reference only, please do not fully trust them.
-        This model is solely for learning and testing purposes, and errors in output are inevitable. We do not take responsibility for the output results. If the output content is to be used, it must be modified; if not modified, we will assume it has been altered.
-        For commercial licensing, please refer to the Llama 3.1 agreement.
-  overrides:
-    parameters:
-      model: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
-  files:
-    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
-      sha256: fa9fc56de7d902b755c43f1a5d0867d961675174a1b3e73a10d822836c3390e6
-      uri: huggingface://LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
- !!merge <<: *llama31
-  name: "llama-3.1-8b-instruct-fei-v1-uncensored"
-  icon: https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored/resolve/main/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.png
-  urls:
-    - https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored
-    - https://huggingface.co/mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF
-  description: |
-    Llama-3.1-8B-Instruct Uncensored
-    more informtion look at Llama-3.1-8B-Instruct
-  overrides:
-    parameters:
-      model: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
-  files:
-    - filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
-      uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
-      sha256: 6b1985616160712eb884c34132dc0602fa4600a19075e3a7b179119b89b73f77
- !!merge <<: *llama31
-  name: "lumimaid-v0.2-8b"
-  urls:
-    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-8B
-    - https://huggingface.co/mradermacher/Lumimaid-v0.2-8B-GGUF
-  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/TUcHg7LKNjfo0sni88Ps7.png
-  description: |
-    This model is based on: Meta-Llama-3.1-8B-Instruct
-
-    Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
-
-    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
-
-    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
-
-    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
-  overrides:
-    parameters:
-      model: Lumimaid-v0.2-8B.Q4_K_M.gguf
-  files:
-    - filename: Lumimaid-v0.2-8B.Q4_K_M.gguf
-      sha256: c8024fcb49c71410903d0d076a1048249fa48b31637bac5177bf5c3f3d603d85
-      uri: huggingface://mradermacher/Lumimaid-v0.2-8B-GGUF/Lumimaid-v0.2-8B.Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "lumimaid-v0.2-70b-i1"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/HY1KTq6FMAm-CwmY8-ndO.png
-  urls:
-    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-70B
-    - https://huggingface.co/mradermacher/Lumimaid-v0.2-70B-i1-GGUF
-  description: |
-    This model is based on: Meta-Llama-3.1-8B-Instruct
-
-    Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
-
-    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
-
-    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
-
-    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
-  overrides:
-    parameters:
-      model: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
-  files:
-    - filename: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
-      sha256: 4857da8685cb0f3d2b8b8c91fb0c07b35b863eb7c185e93ed83ac338e095cbb5
-      uri: huggingface://mradermacher/Lumimaid-v0.2-70B-i1-GGUF/Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
- !!merge <<: *llama31
-  name: "l3.1-8b-celeste-v1.5"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/QcU3xEgVu18jeFtMFxIw-.webp
-  urls:
-    - https://huggingface.co/nothingiisreal/L3.1-8B-Celeste-V1.5
-    - https://huggingface.co/bartowski/L3.1-8B-Celeste-V1.5-GGUF
-  description: |
-    The LLM model is a large language model trained on a combination of datasets including nothingiisreal/c2-logs-cleaned, kalomaze/Opus_Instruct_25k, and nothingiisreal/Reddit-Dirty-And-WritingPrompts. The training was performed on a combination of English-language data using the Hugging Face Transformers library.
-    Trained on LLaMA 3.1 8B Instruct at 8K context using a new mix of Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned This version has the highest coherency and is very strong on OOC: instruct following.
-  overrides:
-    parameters:
-      model: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
-  files:
-    - filename: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
-      sha256: a408dfbbd91ed5561f70d3129af040dfd06704d6c7fa21146aa9f09714aafbc6
-      uri: huggingface://bartowski/L3.1-8B-Celeste-V1.5-GGUF/L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
+## Deepseek
 - &deepseek
-  ## Deepseek
  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
  name: "deepseek-coder-v2-lite-instruct"
  icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
@@ -647,40 +277,6 @@
    - filename: StellarDong-72b.i1-Q4_K_M.gguf
      sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
      uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "magnum-32b-v1-i1"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/635567189c72a7e742f1419c/PK7xRSd18Du0bX-w_t-9c.png
-  urls:
-    - https://huggingface.co/anthracite-org/magnum-32b-v1
-    - https://huggingface.co/mradermacher/magnum-32b-v1-i1-GGUF
-  description: |
-    This is the second in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of Qwen1.5 32B.
-  overrides:
-    parameters:
-      model: magnum-32b-v1.i1-Q4_K_M.gguf
-  files:
-    - filename: magnum-32b-v1.i1-Q4_K_M.gguf
-      sha256: a31704ce0d7e5b774f155522b9ab7ef6015a4ece4e9056bf4dfc6cac561ff0a3
-      uri: huggingface://mradermacher/magnum-32b-v1-i1-GGUF/magnum-32b-v1.i1-Q4_K_M.gguf
- !!merge <<: *qwen2
-  name: "tifa-7b-qwen2-v0.1"
-  urls:
-    - https://huggingface.co/Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF
-  description: |
-    The Tifa role-playing language model is a high-performance language model based on a self-developed 220B model distillation, with a new base model of qwen2-7B. The model has been converted to gguf format for running in the Ollama framework, providing excellent dialogue and text generation capabilities.
-
-    The original model was trained on a large-scale industrial dataset and then fine-tuned with 400GB of novel data and 20GB of multi-round dialogue directive data to achieve good role-playing effects.
-
-    The Tifa model is suitable for multi-round dialogue processing, role-playing and scenario simulation, EFX industrial knowledge integration, and high-quality literary creation.
-
-    Note: The Tifa model is in Chinese and English, with 7.6% of the data in Chinese role-playing and 4.2% in English role-playing. The model has been trained with a mix of EFX industrial field parameters and question-answer dialogues generated from 220B model outputs since 2023. The recommended quantization method is f16, as it retains more detail and accuracy in the model's performance.
-  overrides:
-    parameters:
-      model: tifa-7b-qwen2-v0.1.q4_k_m.gguf
-  files:
-    - filename: tifa-7b-qwen2-v0.1.q4_k_m.gguf
-      sha256: 1f5adbe8cb0a6400f51abdca3bf4e32284ebff73cc681a43abb35c0a6ccd3820
-      uri: huggingface://Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF/tifa-7b-qwen2-v0.1.q4_k_m.gguf
 - &mistral03
  ## START Mistral
  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -755,7 +351,12 @@
    - gpu
    - mistral
    - cpu
-  description: "\U0001F52C Einstein-v4-7B\n\nThis model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.\n\nThis model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.\n"
+  description: |
+    🔬 Einstein-v4-7B
+
+    This model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.
+
+    This model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.
  overrides:
    parameters:
      model: Einstein-v4-7B.Q4_K_M.gguf
@@ -763,46 +364,6 @@
    - filename: Einstein-v4-7B.Q4_K_M.gguf
      sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
      uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
- !!merge <<: *mistral03
-  name: "mistral-nemo-instruct-2407"
-  urls:
-    - https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407
-    - https://huggingface.co/bartowski/Mistral-Nemo-Instruct-2407-GGUF
-    - https://mistral.ai/news/mistral-nemo/
-  description: |
-    The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407. Trained jointly by Mistral AI and NVIDIA, it significantly outperforms existing models smaller or similar in size.
-  overrides:
-    parameters:
-      model: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
-  files:
-    - filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
-      sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
-      uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
- !!merge <<: *mistral03
-  name: "lumimaid-v0.2-12b"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/ep3ojmuMkFS-GmgRuI9iB.png
-  urls:
-    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-12B
-    - https://huggingface.co/mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF
-  description: |
-    This model is based on: Mistral-Nemo-Instruct-2407
-
-    Wandb: https://wandb.ai/undis95/Lumi-Mistral-Nemo?nw=nwuserundis95
-
-    NOTE: As explained on Mistral-Nemo-Instruct-2407 repo, it's recommended to use a low temperature, please experiment!
-
-    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
-
-    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
-
-    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
-  overrides:
-    parameters:
-      model: lumimaid-v0.2-12b-q4_k_m.gguf
-  files:
-    - filename: lumimaid-v0.2-12b-q4_k_m.gguf
-      sha256: f72299858a07e52be920b86d42ddcfcd5008b961d601ef6fd6a98a3377adccbf
-      uri: huggingface://mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF/lumimaid-v0.2-12b-q4_k_m.gguf
 - &mudler
  ### START mudler's LocalAI specific-models
  url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -1203,101 +764,6 @@
    - filename: EMO-2B.Q4_K_M.gguf
      sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
      uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
- !!merge <<: *gemma
-  name: "gemmoy-9b-g2-mk.3-i1"
-  icon: https://huggingface.co/Hastagaras/G2-Gemmoy-9B-MK.3-RP/resolve/main/gemmoy.jpg
-  urls:
-    - https://huggingface.co/Hastagaras/Gemmoy-9B-G2-MK.3
-    - https://huggingface.co/mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF
-  description: |
-    The Gemmoy-9B-G2-MK.3 model is a large language model trained on a variety of datasets, including grimulkan/LimaRP-augmented, LDJnr/Capybara, TheSkullery/C2logs_Filtered_Sharegpt_Merged, abacusai/SystemChat-1.1, and Hastagaras/FTTS-Stories-Sharegpt.
-  overrides:
-    parameters:
-      model: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
-  files:
-    - filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
-      sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
-      uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
- !!merge <<: *gemma
-  name: "sunfall-simpo-9b"
-  urls:
-    - https://huggingface.co/mradermacher/sunfall-SimPO-9B-GGUF
-  description: |
-    Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
-  overrides:
-    parameters:
-      model: sunfall-SimPO-9B.Q4_K_M.gguf
-  files:
-    - filename: sunfall-SimPO-9B.Q4_K_M.gguf
-      sha256: 810c51c6ce34107706d921531b97cfa409cd53c215d18b88bce7cdb617f73ceb
-      uri: huggingface://mradermacher/sunfall-SimPO-9B-GGUF/sunfall-SimPO-9B.Q4_K_M.gguf
- !!merge <<: *gemma
-  name: "sunfall-simpo-9b-i1"
-  urls:
-    - https://huggingface.co/mradermacher/sunfall-SimPO-9B-i1-GGUF
-  description: |
-    Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
-  overrides:
-    parameters:
-      model: sunfall-SimPO-9B.i1-Q4_K_M.gguf
-  files:
-    - filename: sunfall-SimPO-9B.i1-Q4_K_M.gguf
-      sha256: edde9df372a9a5b2316dc6822dc2f52f5a2059103dd7f08072e5a5355c5f5d0b
-      uri: huggingface://mradermacher/sunfall-SimPO-9B-i1-GGUF/sunfall-SimPO-9B.i1-Q4_K_M.gguf
- !!merge <<: *gemma
-  name: "seeker-9b"
-  icon: https://huggingface.co/lodrick-the-lafted/seeker-9b/resolve/main/seeker.webp
-  urls:
-    - https://huggingface.co/lodrick-the-lafted/seeker-9b
-    - https://huggingface.co/mradermacher/seeker-9b-GGUF
-  description: |
-    The LLM model is the "Seeker-9b" model, which is a large language model trained on a diverse range of text data. It has 9 billion parameters and is based on the "lodrick-the-lafted" repository. The model is capable of generating text and can be used for a variety of natural language processing tasks such as language translation, text summarization, and text generation. It supports the English language and is available under the Apache-2.0 license.
-  overrides:
-    parameters:
-      model: seeker-9b.Q4_K_M.gguf
-  files:
-    - filename: seeker-9b.Q4_K_M.gguf
-      sha256: 7658e5bdad96dc8d232f83cff7c3fe5fa993defbfd3e728dcc7436352574a00a
-      uri: huggingface://mradermacher/seeker-9b-GGUF/seeker-9b.Q4_K_M.gguf
- !!merge <<: *gemma
-  name: "gemmasutra-pro-27b-v1"
-  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp
-  urls:
-    - https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1
-    - https://huggingface.co/mradermacher/Gemmasutra-Pro-27B-v1-GGUF
-  description: |
-    An RP model with impressive flexibility. Finetuned by yours truly.
-  overrides:
-    parameters:
-      model: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
-  files:
-    - filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
-      sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218
-      uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
- !!merge <<: *gemma
-  name: "tarnished-9b-i1"
-  icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg
-  urls:
-    - https://huggingface.co/lodrick-the-lafted/tarnished-9b
-    - https://huggingface.co/mradermacher/tarnished-9b-i1-GGUF
-  description: |
-    Ah, so you've heard whispers on the winds, have you?  🧐
-
-    Imagine this:
-    Tarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.
-
-    It's a tongue that dances with secrets, a whisperer of lore lost and found.  Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.
-
-    But be warned, Tarnished One!  For knowledge comes at a price.  The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.
-
-    Dare you tread this path?
-  overrides:
-    parameters:
-      model: tarnished-9b.i1-Q4_K_M.gguf
-  files:
-    - filename: tarnished-9b.i1-Q4_K_M.gguf
-      sha256: 62ab09124b3f6698bd94ef966533ae5d427d87f6bdc09f6f46917def96420a0c
-      uri: huggingface://mradermacher/tarnished-9b-i1-GGUF/tarnished-9b.i1-Q4_K_M.gguf
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1506,36 +972,6 @@
    - filename: llama-3-stheno-mahou-8b-q4_k_m.gguf
      sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11
      uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf
- !!merge <<: *llama3
-  name: "l3-8b-stheno-horny-v3.3-32k-q5_k_m"
-  urls:
-    - https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
-    - https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF
-  description: |
-    This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
-
-    We took V3.3 Stheno weights from here
-
-    And applied our lora at Alpha = 768
-
-    Thank you to Sao10K for the amazing model.
-
-    This is not legal advice. I don't put any extra licensing on my own lora.
-
-    LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
-
-    LLaMA 3 license can be found here
-
-    If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
-
-    Again, not legal advice.
-  overrides:
-    parameters:
-      model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
-  files:
-    - filename: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
-      sha256: 8d934f80ca6dbaa4852846108da92446a26715fbd5f6fc3859568850edf05262
-      uri: huggingface://Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF/l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
 - !!merge <<: *llama3
  name: "llama-3-8b-openhermes-dpo"
  urls:
@@ -2428,81 +1864,6 @@
    - filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
      sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
      uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "meta-llama-3-instruct-12.2b-brainstorm-20x-form-8"
-  urls:
-    - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF
-  description: |
-    Meta-Llama-3-8B Instruct (now at 12.2B) with Brainstorm process that increases its performance at the core level for any creative use case. It has calibrations that allow it to exceed the logic solving abilities of the original model. The Brainstorm process expands the reasoning center of the LLM, reassembles and calibrates it, introducing subtle changes into the reasoning process. This enhances the model's detail, concept, connection to the "world", general concept connections, prose quality, and prose length without affecting instruction following. It improves coherence, description, simile, metaphors, emotional engagement, and takes fewer liberties with instructions while following them more closely. The model's performance is further enhanced by other technologies like "Ultra" (precision), "Neo Imatrix" (custom imatrix datasets), and "X-quants" (custom application of the imatrix process). It has been tested on multiple LLaMA2, LLaMA3, and Mistral models of various parameter sizes.
-  overrides:
-    parameters:
-      model: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
-  files:
-    - filename: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
-      sha256: 5568ab6195ab5da703f728cc118108ddcbe97255e3ba4a543b531acdf082b999
-      uri: huggingface://DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF/Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "loki-base-i1"
-  urls:
-    - https://huggingface.co/MrRobotoAI/Loki-base
-    - https://huggingface.co/mradermacher/Loki-base-i1-GGUF
-  description: |
-    Merge of several models using mergekit:
-    - model: abacusai/Llama-3-Smaug-8B
-    - model: Aculi/Llama3-Sophie
-    - model: ajibawa-2023/Uncensored-Frank-Llama-3-8B
-    - model: Blackroot/Llama-3-Gamma-Twist
-    - model: Casual-Autopsy/L3-Super-Nova-RP-8B
-    - model: Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B
-    - model: cgato/L3-TheSpice-8b-v0.8.3
-    - model: ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
-    - model: ChaoticNeutrals/Hathor_RP-v.01-L3-8B
-    - model: chargoddard/prometheus-2-llama-3-8b
-    - model: chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO
-    - model: chujiezheng/LLaMA3-iterative-DPO-final-ExPO
-    - model: Fizzarolli/L3-8b-Rosier-v1
-    - model: flammenai/Mahou-1.2a-llama3-8B
-    - model: HaitameLaf/Llama-3-8B-StoryGenerator
-    - model: HPAI-BSC/Llama3-Aloe-8B-Alpha
-    - model: iRyanBell/ARC1
-    - model: iRyanBell/ARC1-II
-    - model: lemon07r/Llama-3-RedMagic4-8B
-    - model: lemon07r/Lllama-3-RedElixir-8B
-    - model: Locutusque/Llama-3-Hercules-5.0-8B
-    - model: Magpie-Align/Llama-3-8B-Magpie-Pro-MT-SFT-v0.1
-    - model: maldv/badger-lambda-llama-3-8b
-    - model: maldv/badger-mu-llama-3-8b
-    - model: maldv/badger-writer-llama-3-8b
-    - model: mlabonne/NeuralDaredevil-8B-abliterated
-    - model: MrRobotoAI/Fiction-Writer-6
-    - model: MrRobotoAI/Unholy-Thoth-8B-v2
-    - model: nbeerbower/llama-3-spicy-abliterated-stella-8B
-    - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1
-    - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS
-    - model: Nitral-AI/Hathor_Sofit-L3-8B-v1
-    - model: Nitral-AI/Hathor_Stable-v0.2-L3-8B
-    - model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85
-    - model: Nitral-AI/Poppy_Porpoise-0.72-L3-8B
-    - model: nothingiisreal/L3-8B-Instruct-Abliterated-DWP
-    - model: nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
-    - model: NousResearch/Hermes-2-Theta-Llama-3-8B
-    - model: OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0
-    - model: refuelai/Llama-3-Refueled
-    - model: ResplendentAI/Nymph_8B
-    - model: shauray/Llama3-8B-DPO-uncensored
-    - model: SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
-    - model: TIGER-Lab/MAmmoTH2-8B-Plus
-    - model: Undi95/Llama-3-LewdPlay-8B
-    - model: Undi95/Meta-Llama-3-8B-hf
-    - model: VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct
-    - model: WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
-  overrides:
-    parameters:
-      model: Loki-base.i1-Q4_K_M.gguf
-  files:
-    - filename: Loki-base.i1-Q4_K_M.gguf
-      sha256: 60a4357fa399bfd18aa841cc529da09439791331d117a4f06f0467d002b385bb
-      uri: huggingface://mradermacher/Loki-base-i1-GGUF/Loki-base.i1-Q4_K_M.gguf
 - &dolphin
  name: "dolphin-2.9-llama3-8b"
  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -3662,6 +3023,7 @@
    - filename: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
      sha256: 71fef02915c606b438ccff2cae6b7760bbb54a558d5f2d39c2421d97b6682fea
      uri: huggingface://QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF/ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
+
 - !!merge <<: *llama3
  name: "llama-3-ezo-8b-common-it"
  icon: https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
@@ -3669,11 +3031,11 @@
    - https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
    - https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF
  description: |
-    Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
+      Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)

-    This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
+      This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.

-    このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
+      このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
  overrides:
    parameters:
      model: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf
@@ -3802,6 +3164,7 @@
    - filename: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
      sha256: ecbd57783006f1a027f8a7f5a5d551dc8b3568912825f566d79fd34a804e8970
      uri: huggingface://mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF/L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
+
 - !!merge <<: *llama3
  name: "l3-15b-etherealmaid-t0.0001-i1"
  icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/FwYXt2h_FdmlL0Z6qYufz.png
@@ -3923,19 +3286,6 @@
    - filename: calme-2.4-llama3-70b.Q4_K_M.gguf
      sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
      uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
- !!merge <<: *llama3
-  name: "meta-llama-3-instruct-8.9b-brainstorm-5x-form-11"
-  urls:
-    - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF
-  description: |
-    Meta-Llama-3-8B Instruct (now at 8.9B) is an enhanced version of the LLM model, specifically designed for creative use cases such as story writing, roleplaying, and fiction. This model has been augmented through the "Brainstorm" process, which involves expanding and calibrating the reasoning center of the LLM to improve its performance in various creative tasks. The enhancements brought by this process include more detailed and nuanced descriptions, stronger prose, and a greater sense of immersion in the story. The model is capable of generating long and vivid content, with fewer clichés and more focused, coherent narratives. Users can provide more instructions and details to elicit stronger and more engaging responses from the model. The "Brainstorm" process has been tested on multiple LLM models, including Llama2, Llama3, and Mistral, as well as on individual models like Llama3 Instruct, Mistral Instruct, and custom fine-tuned models.
-  overrides:
-    parameters:
-      model: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
-  files:
-    - filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
-      sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98
-      uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
 - &command-R
  ### START Command-r
  url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -4178,8 +3528,8 @@
      model: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
  files:
    - filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
+      sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
      uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
-      sha256: d6d25bf078321bea4a079c727b273cb0b5a2e0b4cf3add0f7a2c8e43075c414f
 - !!merge <<: *phi-3
  name: "phillama-3.8b-v0.1"
  icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png
@@ -4504,28 +3854,6 @@
    - filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
      uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
      sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
- !!merge <<: *codellama
-  url: "github:mudler/LocalAI/gallery/alpaca.yaml@master"
-  icon: https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1/resolve/main/LeetCodeWizardLogo.png
-  name: "leetcodewizard_7b_v1.1-i1"
-  urls:
-    - https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1
-    - https://huggingface.co/mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF
-  description: |
-    LeetCodeWizard is a coding large language model specifically trained to solve and explain Leetcode (or any) programming problems.
-    This model is a fine-tuned version of the WizardCoder-Python-7B with a dataset of Leetcode problems\
-    Model capabilities:
-
-        It should be able to solve most of the problems found at Leetcode and even pass the sample interviews they offer on the site.
-
-        It can write both the code and the explanations for the solutions.
-  overrides:
-    parameters:
-      model: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
-  files:
-    - filename: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
-      sha256: 19720d8e1ba89d32c6f88ed6518caf0251f9e3ec011297929c801efc5ea979f4
-      uri: huggingface://mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF/LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
 - &llm-compiler
  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
  name: "llm-compiler-13b-imat"
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@@ -31,7 +31,7 @@ config_file: |
      {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
      Function call:
    chat: |
-      {{.Input }}
+      <|begin_of_text|>{{.Input }}
      <|start_header_id|>assistant<|end_header_id|>
    completion: |
      {{.Input}}
--- a/gallery/llama3.1-instruct-grammar.yaml
+++ b/gallery/llama3.1-instruct-grammar.yaml
@@ -1,64 +0,0 @@
---
-name: "llama3-instruct-grammar"
-
-config_file: |
-  mmap: true
-  function:
-    disable_no_action: true
-    grammar:
-      no_mixed_free_string: true
-      mixed_mode: true
-      schema_type: llama3.1 # or JSON is supported too (json)
-    response_regex:
-    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
-  template:
-    chat_message: |
-      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
-
-      {{ if .FunctionCall -}}
-      Function call:
-      {{ else if eq .RoleName "tool" -}}
-      Function response:
-      {{ end -}}
-      {{ if .Content -}}
-      {{.Content -}}
-      {{ else if .FunctionCall -}}
-      {{ toJson .FunctionCall -}}
-      {{ end -}}
-      <|eot_id|>
-    function: |
-      <|start_header_id|>system<|end_header_id|>
-
-      You have access to the following functions:
-
-      {{range .Functions}}
-      Use the function '{{.Name}}' to '{{.Description}}'
-      {{toJson .Parameters}}
-      {{end}}
-
-      Think very carefully before calling functions.
-      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
-
-      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
-
-      Reminder:
-      - If looking for real time information use relevant functions before falling back to searching on internet
-      - Function calls MUST follow the specified format, start with <function= and end with </function>
-      - Required parameters MUST be specified
-      - Only call one function at a time
-      - Put the entire function call reply on one line
-      <|eot_id|>
-      {{.Input }}
-      <|start_header_id|>assistant<|end_header_id|>
-    chat: |
-      {{.Input }}
-      <|start_header_id|>assistant<|end_header_id|>
-    completion: |
-      {{.Input}}
-  context_size: 8192
-  f16: true
-  stopwords:
-  - <|im_end|>
-  - <dummy32000>
-  - "<|eot_id|>"
-  - <|end_of_text|>
--- a/gallery/llama3.1-instruct.yaml
+++ b/gallery/llama3.1-instruct.yaml
@@ -1,62 +0,0 @@
---
-name: "llama3-instruct"
-
-config_file: |
-  mmap: true
-  function:
-    disable_no_action: true
-    grammar:
-      disable: true
-    response_regex:
-    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
-  template:
-    chat_message: |
-      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
-
-      {{ if .FunctionCall -}}
-      Function call:
-      {{ else if eq .RoleName "tool" -}}
-      Function response:
-      {{ end -}}
-      {{ if .Content -}}
-      {{.Content -}}
-      {{ else if .FunctionCall -}}
-      {{ toJson .FunctionCall -}}
-      {{ end -}}
-      <|eot_id|>
-    function: |
-      <|start_header_id|>system<|end_header_id|>
-
-      You have access to the following functions:
-
-      {{range .Functions}}
-      Use the function '{{.Name}}' to '{{.Description}}'
-      {{toJson .Parameters}}
-      {{end}}
-
-      Think very carefully before calling functions.
-      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
-
-      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
-
-      Reminder:
-      - If looking for real time information use relevant functions before falling back to searching on internet
-      - Function calls MUST follow the specified format, start with <function= and end with </function>
-      - Required parameters MUST be specified
-      - Only call one function at a time
-      - Put the entire function call reply on one line
-      <|eot_id|>
-      {{.Input }}
-      <|start_header_id|>assistant<|end_header_id|>
-    chat: |
-      {{.Input }}
-      <|start_header_id|>assistant<|end_header_id|>
-    completion: |
-      {{.Input}}
-  context_size: 8192
-  f16: true
-  stopwords:
-  - <|im_end|>
-  - <dummy32000>
-  - "<|eot_id|>"
-  - <|end_of_text|>
--- a/pkg/functions/function_structure.go
+++ b/pkg/functions/function_structure.go
@@ -1,43 +0,0 @@
-package functions
-
-import (
-	"encoding/json"
-
-	"github.com/mudler/LocalAI/pkg/functions/grammars"
-)
-
-type Item struct {
-	Type       string                 `json:"type"`
-	Properties map[string]interface{} `json:"properties"`
-}
-
-type JSONFunctionStructure struct {
-	OneOf []Item                 `json:"oneOf,omitempty"`
-	AnyOf []Item                 `json:"anyOf,omitempty"`
-	Defs  map[string]interface{} `json:"$defs,omitempty"`
-}
-
-func (j JSONFunctionStructure) Grammar(options ...func(*grammars.GrammarOption)) (string, error) {
-	grammarOpts := &grammars.GrammarOption{}
-	grammarOpts.Apply(options...)
-
-	dat, err := json.Marshal(j)
-	if err != nil {
-		return "", err
-	}
-
-	converter := NewSchemaConverter(*grammarOpts)
-	return converter.GrammarFromBytes(dat, options...)
-}
-
-type SchemaConverter interface {
-	GrammarFromBytes([]byte, ...func(*grammars.GrammarOption)) (string, error)
-}
-
-func NewSchemaConverter(opt grammars.GrammarOption) SchemaConverter {
-	switch {
-	case opt.SchemaType == grammars.LLama31Schema:
-		return grammars.NewLLama31SchemaConverter(opt.FunctionName)
-	}
-	return grammars.NewJSONSchemaConverter(opt.PropOrder)
-}
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -18,15 +18,6 @@ type Function struct {
 }
 type Functions []Function

-type FunctionName struct {
-	Const string `json:"const"`
-}
-
-type Argument struct {
-	Type       string                 `json:"type"`
-	Properties map[string]interface{} `json:"properties"`
-}
-
 type Tool struct {
 	Type     string   `json:"type"`
 	Function Function `json:"function,omitempty"`
--- a/pkg/functions/functions_suite_test.go
+++ b/pkg/functions/functions_suite_test.go
@@ -1,4 +1,4 @@
-package functions_test
+package functions

 import (
 	"testing"
@@ -7,7 +7,7 @@ import (
 	. "github.com/onsi/gomega"
 )

-func TestFunctions(t *testing.T) {
+func TestGrammar(t *testing.T) {
 	RegisterFailHandler(Fail)
-	RunSpecs(t, "Functions test suite")
+	RunSpecs(t, "Grammar test suite")
 }
--- a/pkg/functions/grammar_json_schema.go
+++ b/pkg/functions/grammar_json_schema.go
@@ -0,0 +1,378 @@
+package functions
+
+// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
+
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"sort"
+	"strings"
+
+	"github.com/mudler/LocalAI/pkg/utils"
+)
+
+const (
+	JSONBNF = `root   ::= object
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+ws ::= ([ \t\n] ws)?`
+)
+
+var (
+	SPACE_RULE = `" "?`
+
+	PRIMITIVE_RULES = map[string]string{
+		"boolean": `("true" | "false") space`,
+		"number":  `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
+		"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
+		"string": `"\"" (
+			[^"\\] |
+			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+		  )* "\"" space`,
+		// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
+		// however, if we don't have it, the grammar will be ambiguous and
+		// empirically results are way worse.
+		"freestring": `(
+			[^\x00] |
+			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+		  )* space`,
+		"null": `"null" space`,
+	}
+
+	INVALID_RULE_CHARS_RE     = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
+	GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
+	GRAMMAR_LITERAL_ESCAPES   = map[string]string{
+		"\r": `\r`,
+		"\n": `\n`,
+		`"`:  `\"`,
+	}
+)
+
+type JSONSchemaConverter struct {
+	propOrder map[string]int
+	rules     map[string]string
+}
+
+func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
+	propOrderSlice := strings.Split(propOrder, ",")
+	propOrderMap := make(map[string]int)
+	for idx, name := range propOrderSlice {
+		propOrderMap[name] = idx
+	}
+
+	rules := make(map[string]string)
+	rules["space"] = SPACE_RULE
+
+	return &JSONSchemaConverter{
+		propOrder: propOrderMap,
+		rules:     rules,
+	}
+}
+
+func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) string {
+	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jsonString(literal), func(match string) string {
+		return GRAMMAR_LITERAL_ESCAPES[match]
+	})
+	return fmt.Sprintf(`"%s"`, escaped)
+}
+
+func (sc *JSONSchemaConverter) addRule(name, rule string) string {
+	escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
+	key := escName
+	if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
+		i := 0
+		for {
+			key = fmt.Sprintf("%s%d", escName, i)
+			if _, ok := sc.rules[key]; !ok {
+				break
+			}
+			i++
+		}
+	}
+	sc.rules[key] = rule
+	return key
+}
+
+const arrayNewLines = `arr  ::=
+  "[\n"  (
+		realvalue
+    (",\n"  realvalue)*
+  )? "]"`
+
+const array = `arr  ::=
+  "["  (
+		realvalue
+    (","  realvalue)*
+  )? "]"`
+
+func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
+
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
+	prefix := grammarOpts.Prefix
+	maybeArray := grammarOpts.MaybeArray
+	disableParallelNewLines := grammarOpts.DisableParallelNewLines
+	maybeString := grammarOpts.MaybeString
+	noMixedFreeString := grammarOpts.NoMixedFreeString
+
+	var lines []string
+
+	swapRoot := maybeArray || maybeString || prefix != ""
+
+	// write down the computed rules.
+	// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
+	for name, rule := range sc.rules {
+		if swapRoot && name == "root" {
+			name = "realvalue"
+		}
+		lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
+	}
+
+	if !swapRoot {
+		return strings.Join(lines, "\n")
+	}
+
+	newRoot := "realvalue"
+	if maybeArray {
+		newRoot = "arr | realvalue"
+	}
+
+	freestringRule := "mixedstring"
+	if noMixedFreeString {
+		freestringRule = "freestring"
+	}
+
+	if prefix != "" {
+		// quote newlines in suffix
+		prefix = utils.EscapeNewLines(prefix)
+
+		if maybeArray && maybeString {
+			newRoot = "(" + newRoot + ")"
+		}
+
+		if maybeString {
+			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
+			newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
+		} else {
+			newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
+		}
+	} else if maybeString {
+		if maybeArray {
+			//	newRoot = "(" + newRoot + ")"
+		}
+
+		newRoot = freestringRule + " | " + newRoot
+	}
+
+	lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
+	if disableParallelNewLines {
+		lines = append(lines, array)
+	} else {
+		lines = append(lines, arrayNewLines)
+	}
+
+	if maybeArray {
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
+		}
+	} else {
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
+		}
+	}
+
+	return strings.Join(lines, "\n")
+}
+
+func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) string {
+	st, existType := schema["type"]
+	var schemaType string
+	if existType {
+		schemaType = st.(string)
+	}
+	ruleName := name
+	if name == "" {
+		ruleName = "root"
+	}
+	_, oneOfExists := schema["oneOf"]
+	_, anyOfExists := schema["anyOf"]
+	if oneOfExists || anyOfExists {
+		var alternatives []string
+		oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
+		anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
+
+		if oneOfExists {
+			for i, altSchema := range oneOfSchemas {
+				alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				alternatives = append(alternatives, alternative)
+			}
+		} else if anyOfExists {
+			for i, altSchema := range anyOfSchemas {
+				alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				alternatives = append(alternatives, alternative)
+			}
+		}
+
+		rule := strings.Join(alternatives, " | ")
+		return sc.addRule(ruleName, rule)
+	} else if ref, exists := schema["$ref"].(string); exists {
+		referencedSchema := sc.resolveReference(ref, rootSchema)
+		return sc.visit(referencedSchema, name, rootSchema)
+	} else if constVal, exists := schema["const"]; exists {
+		return sc.addRule(ruleName, sc.formatLiteral(constVal))
+	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
+		var enumRules []string
+		for _, enumVal := range enumVals {
+			enumRule := sc.formatLiteral(enumVal)
+			enumRules = append(enumRules, enumRule)
+		}
+		rule := strings.Join(enumRules, " | ")
+		return sc.addRule(ruleName, rule)
+	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
+		propOrder := sc.propOrder
+		var propPairs []struct {
+			propName   string
+			propSchema map[string]interface{}
+		}
+
+		for propName, propSchema := range properties {
+			propPairs = append(propPairs, struct {
+				propName   string
+				propSchema map[string]interface{}
+			}{propName: propName, propSchema: propSchema.(map[string]interface{})})
+		}
+
+		sort.Slice(propPairs, func(i, j int) bool {
+			iOrder := propOrder[propPairs[i].propName]
+			jOrder := propOrder[propPairs[j].propName]
+			if iOrder != 0 && jOrder != 0 {
+				return iOrder < jOrder
+			}
+			return propPairs[i].propName < propPairs[j].propName
+		})
+
+		var rule strings.Builder
+		rule.WriteString(`"{" space`)
+
+		for i, propPair := range propPairs {
+			propName := propPair.propName
+			propSchema := propPair.propSchema
+			propRuleName := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
+
+			if i > 0 {
+				rule.WriteString(` "," space`)
+			}
+
+			rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, sc.formatLiteral(propName), propRuleName))
+		}
+
+		rule.WriteString(` "}" space`)
+		return sc.addRule(ruleName, rule.String())
+	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
+		itemRuleName := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
+		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
+		return sc.addRule(ruleName, rule)
+	} else {
+		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
+		if !exists {
+			panic(fmt.Sprintf("Unrecognized schema: %v", schema))
+		}
+		if ruleName == "root" {
+			schemaType = "root"
+		}
+		return sc.addRule(schemaType, primitiveRule)
+	}
+}
+func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) map[string]interface{} {
+	if !strings.HasPrefix(ref, "#/$defs/") {
+		panic(fmt.Sprintf("Invalid reference format: %s", ref))
+	}
+
+	defKey := strings.TrimPrefix(ref, "#/$defs/")
+	definitions, exists := rootSchema["$defs"].(map[string]interface{})
+	if !exists {
+		fmt.Println(rootSchema)
+
+		panic("No definitions found in the schema")
+	}
+
+	def, exists := definitions[defKey].(map[string]interface{})
+	if !exists {
+		fmt.Println(definitions)
+
+		panic(fmt.Sprintf("Definition not found: %s", defKey))
+	}
+
+	return def
+}
+
+func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
+	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
+	sc.visit(schema, "", schema)
+	return sc.finalizeGrammar(options...)
+}
+
+func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) string {
+	var schema map[string]interface{}
+	_ = json.Unmarshal(b, &schema)
+	return sc.Grammar(schema, options...)
+}
+
+func jsonString(v interface{}) string {
+	b, _ := json.Marshal(v)
+	return string(b)
+}
+
+type FunctionName struct {
+	Const string `json:"const"`
+}
+
+type Argument struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties"`
+}
+
+type Item struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties"`
+}
+
+type JSONFunctionStructure struct {
+	OneOf []Item                 `json:"oneOf,omitempty"`
+	AnyOf []Item                 `json:"anyOf,omitempty"`
+	Defs  map[string]interface{} `json:"$defs,omitempty"`
+}
+
+func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
+	dat, _ := json.Marshal(j)
+	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
+}
--- a/pkg/functions/grammars/json_schema_test.go
+++ b/pkg/functions/grammars/json_schema_test.go
@@ -1,14 +1,24 @@
-package grammars_test
+package functions_test

 import (
 	"strings"

+	"github.com/mudler/LocalAI/pkg/functions"
 	. "github.com/mudler/LocalAI/pkg/functions"
-	. "github.com/mudler/LocalAI/pkg/functions/grammars"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )

+func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
+	property := map[string]interface{}{}
+	property[field1] = FunctionName{Const: name}
+	property[field2] = Argument{
+		Type:       "object",
+		Properties: properties,
+	}
+	return property
+}
+
 var testFunctions = []Item{
 	{
 		Type: "object",
@@ -235,8 +245,7 @@ root-1-name ::= "\"search\""`
 var _ = Describe("JSON schema grammar tests", func() {
 	Context("JSON", func() {
 		It("generates a valid grammar from JSON schema", func() {
-			grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
-			Expect(err).To(BeNil())
+			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -246,8 +255,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
 		})
 		It("generates a valid grammar from JSON schema", func() {
-			grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
-			Expect(err).To(BeNil())
+			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
 			results := strings.Split(inputResult3, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -261,8 +269,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}

-			grammar, err := structuredGrammar.Grammar()
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar()
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -276,8 +283,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}

-			grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
 			results := strings.Split(
 				strings.Join([]string{
 					inputResult2,
@@ -295,8 +301,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

-			grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
 			results := strings.Split(
 				strings.Join([]string{
 					inputResult4,
@@ -314,11 +319,10 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

-			grammar, err := structuredGrammar.Grammar(
-				SetPrefix("suffix"),
-				EnableMaybeArray,
+			grammar := structuredGrammar.Grammar(
+				functions.SetPrefix("suffix"),
+				functions.EnableMaybeArray,
 			)
-			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`"suffix" arr | realvalue`),
@@ -335,8 +339,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

-			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"))
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`"suffix" realvalue`),
@@ -353,8 +356,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

-			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString)
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`( "suffix" realvalue | mixedstring )`),
@@ -371,8 +373,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

-			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString, EnableMaybeArray)
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`( "suffix" (arr | realvalue) | mixedstring )`),
@@ -391,8 +392,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

-			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray)
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`mixedstring | arr | realvalue`),
@@ -410,8 +410,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}

-			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, NoMixedFreeString)
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`freestring | arr | realvalue`),
@@ -433,8 +432,7 @@ var _ = Describe("JSON schema grammar tests", func() {
 realvalue
 (","  realvalue)*
 )? "]"`
-			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, DisableParallelNewLines)
-			Expect(err).To(BeNil())
+			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
 			results := strings.Split(content, "\n")
 			for _, r := range results {
 				if r != "" {
--- a/pkg/functions/grammars/bnf_rules.go
+++ b/pkg/functions/grammars/bnf_rules.go
@@ -1,58 +0,0 @@
-package grammars
-
-import (
-	"encoding/json"
-	"regexp"
-)
-
-var (
-	PRIMITIVE_RULES = map[string]string{
-		"boolean": `("true" | "false") space`,
-		"number":  `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
-		"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
-		"string": `"\"" (
-			[^"\\] |
-			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-		  )* "\"" space`,
-		// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
-		// however, if we don't have it, the grammar will be ambiguous and
-		// empirically results are way worse.
-		"freestring": `(
-			[^\x00] |
-			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-		  )* space`,
-		"null": `"null" space`,
-	}
-
-	INVALID_RULE_CHARS_RE     = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
-	GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
-	GRAMMAR_LITERAL_ESCAPES   = map[string]string{
-		"\r": `\r`,
-		"\n": `\n`,
-		`"`:  `\"`,
-	}
-)
-
-const (
-	SPACE_RULE = `" "?`
-
-	arrayNewLines = `arr  ::=
-  "[\n"  (
-		realvalue
-    (",\n"  realvalue)*
-  )? "]"`
-
-	array = `arr  ::=
-  "["  (
-		realvalue
-    (","  realvalue)*
-  )? "]"`
-)
-
-func jsonString(v interface{}) (string, error) {
-	b, err := json.Marshal(v)
-	if err != nil {
-		return "", err
-	}
-	return string(b), nil
-}
--- a/pkg/functions/grammars/grammars_suite_test.go
+++ b/pkg/functions/grammars/grammars_suite_test.go
@@ -1,25 +0,0 @@
-package grammars_test
-
-import (
-	"testing"
-
-	. "github.com/mudler/LocalAI/pkg/functions"
-
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-func TestGrammar(t *testing.T) {
-	RegisterFailHandler(Fail)
-	RunSpecs(t, "Grammar test suite")
-}
-
-func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
-	property := map[string]interface{}{}
-	property[field1] = FunctionName{Const: name}
-	property[field2] = Argument{
-		Type:       "object",
-		Properties: properties,
-	}
-	return property
-}
--- a/pkg/functions/grammars/json_schema.go
+++ b/pkg/functions/grammars/json_schema.go
@@ -1,220 +0,0 @@
-package grammars
-
-// a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
-
-import (
-	"encoding/json"
-	"fmt"
-	"sort"
-	"strings"
-)
-
-type JSONSchemaConverter struct {
-	propOrder map[string]int
-	rules     Rules
-}
-
-func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
-	propOrderSlice := strings.Split(propOrder, ",")
-	propOrderMap := make(map[string]int)
-	for idx, name := range propOrderSlice {
-		propOrderMap[name] = idx
-	}
-
-	rules := make(map[string]string)
-	rules["space"] = SPACE_RULE
-
-	return &JSONSchemaConverter{
-		propOrder: propOrderMap,
-		rules:     rules,
-	}
-}
-
-func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) (string, error) {
-	jLiteral, err := jsonString(literal)
-	if err != nil {
-		return "", err
-	}
-	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
-		return GRAMMAR_LITERAL_ESCAPES[match]
-	})
-	return fmt.Sprintf(`"%s"`, escaped), nil
-}
-
-func (sc *JSONSchemaConverter) addRule(name, rule string) string {
-	escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
-	key := escName
-	if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
-		i := 0
-		for {
-			key = fmt.Sprintf("%s%d", escName, i)
-			if _, ok := sc.rules[key]; !ok {
-				break
-			}
-			i++
-		}
-	}
-	sc.rules[key] = rule
-	return key
-}
-
-func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
-	st, existType := schema["type"]
-	var schemaType string
-	if existType {
-		schemaType = st.(string)
-	}
-	ruleName := name
-	if name == "" {
-		ruleName = "root"
-	}
-	_, oneOfExists := schema["oneOf"]
-	_, anyOfExists := schema["anyOf"]
-	if oneOfExists || anyOfExists {
-		var alternatives []string
-		oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
-		anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
-
-		if oneOfExists {
-			for i, altSchema := range oneOfSchemas {
-				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
-				if err != nil {
-					return "", err
-				}
-				alternatives = append(alternatives, alternative)
-			}
-		} else if anyOfExists {
-			for i, altSchema := range anyOfSchemas {
-				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
-				if err != nil {
-					return "", err
-				}
-				alternatives = append(alternatives, alternative)
-			}
-		}
-
-		rule := strings.Join(alternatives, " | ")
-		return sc.addRule(ruleName, rule), nil
-	} else if ref, exists := schema["$ref"].(string); exists {
-		referencedSchema, err := sc.resolveReference(ref, rootSchema)
-		if err != nil {
-			return "", err
-		}
-		return sc.visit(referencedSchema, name, rootSchema)
-	} else if constVal, exists := schema["const"]; exists {
-		literal, err := sc.formatLiteral((constVal))
-		if err != nil {
-			return "", err
-		}
-		return sc.addRule(ruleName, literal), nil
-	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
-		var enumRules []string
-		for _, enumVal := range enumVals {
-			enumRule, err := sc.formatLiteral(enumVal)
-			if err != nil {
-				return "", err
-			}
-			enumRules = append(enumRules, enumRule)
-		}
-		rule := strings.Join(enumRules, " | ")
-		return sc.addRule(ruleName, rule), nil
-	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
-		propOrder := sc.propOrder
-		var propPairs []struct {
-			propName   string
-			propSchema map[string]interface{}
-		}
-
-		for propName, propSchema := range properties {
-			propPairs = append(propPairs, struct {
-				propName   string
-				propSchema map[string]interface{}
-			}{propName: propName, propSchema: propSchema.(map[string]interface{})})
-		}
-
-		sort.Slice(propPairs, func(i, j int) bool {
-			iOrder := propOrder[propPairs[i].propName]
-			jOrder := propOrder[propPairs[j].propName]
-			if iOrder != 0 && jOrder != 0 {
-				return iOrder < jOrder
-			}
-			return propPairs[i].propName < propPairs[j].propName
-		})
-
-		var rule strings.Builder
-		rule.WriteString(`"{" space`)
-
-		for i, propPair := range propPairs {
-			propName := propPair.propName
-			propSchema := propPair.propSchema
-			propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
-			if err != nil {
-				return "", err
-			}
-			lPropName, err := sc.formatLiteral(propName)
-			if err != nil {
-				return "", err
-			}
-			if i > 0 {
-				rule.WriteString(` "," space`)
-			}
-
-			rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
-		}
-
-		rule.WriteString(` "}" space`)
-		return sc.addRule(ruleName, rule.String()), nil
-	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
-		itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
-		if err != nil {
-			return "", err
-		}
-		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
-		return sc.addRule(ruleName, rule), nil
-	} else {
-		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
-		if !exists {
-			return "", fmt.Errorf("unrecognized schema: %v", schema)
-		}
-		if ruleName == "root" {
-			schemaType = "root"
-		}
-		return sc.addRule(schemaType, primitiveRule), nil
-	}
-}
-func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
-	if !strings.HasPrefix(ref, "#/$defs/") {
-		return nil, fmt.Errorf("invalid reference format: %s", ref)
-	}
-
-	defKey := strings.TrimPrefix(ref, "#/$defs/")
-	definitions, exists := rootSchema["$defs"].(map[string]interface{})
-	if !exists {
-		return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
-	}
-
-	def, exists := definitions[defKey].(map[string]interface{})
-	if !exists {
-		return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
-	}
-
-	return def, nil
-}
-
-func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
-	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
-	_, err := sc.visit(schema, "", schema)
-	if err != nil {
-		return "", err
-	}
-	return sc.rules.ToGrammar(options...), nil
-}
-
-func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
-	var schema map[string]interface{}
-	err := json.Unmarshal(b, &schema)
-	if err != nil {
-		return "", err
-	}
-	return sc.Grammar(schema, options...)
-}
--- a/pkg/functions/grammars/llama31_schema.go
+++ b/pkg/functions/grammars/llama31_schema.go
@@ -1,281 +0,0 @@
-package grammars
-
-import (
-	"encoding/json"
-	"fmt"
-	"regexp"
-	"sort"
-	"strings"
-)
-
-type LLama31SchemaConverter struct {
-	fnName string
-	rules  Rules
-}
-
-func NewLLama31SchemaConverter(fnName string) *LLama31SchemaConverter {
-	rules := make(map[string]string)
-	rules["space"] = SPACE_RULE
-	if fnName == "" {
-		fnName = "name"
-	}
-
-	return &LLama31SchemaConverter{
-		rules:  rules,
-		fnName: fnName,
-	}
-}
-
-var GRAMMAR_LITERAL_ESCAPESLlama = map[string]string{
-	"\r": `\r`,
-	"\n": `\n`,
-}
-
-var GRAMMAR_LITERAL_ESCAPE_RELlama = regexp.MustCompile(`[\r\n]`)
-
-func (sc *LLama31SchemaConverter) formatLiteral(literal interface{}) (string, error) {
-	jLiteral, err := jsonString(literal)
-	if err != nil {
-		return "", err
-	}
-	escaped := GRAMMAR_LITERAL_ESCAPE_RELlama.ReplaceAllStringFunc(jLiteral, func(match string) string {
-		return GRAMMAR_LITERAL_ESCAPESLlama[match]
-	})
-	return escaped, nil
-}
-
-func (sc *LLama31SchemaConverter) formatLiteralQuoted(literal interface{}) (string, error) {
-	jLiteral, err := jsonString(literal)
-	if err != nil {
-		return "", err
-	}
-	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
-		return GRAMMAR_LITERAL_ESCAPES[match]
-	})
-	return fmt.Sprintf(`"%s"`, escaped), nil
-}
-
-func (sc *LLama31SchemaConverter) addRule(name, rule string) string {
-	escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
-	key := escName
-	if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
-		i := 0
-		for {
-			key = fmt.Sprintf("%s%d", escName, i)
-			if _, ok := sc.rules[key]; !ok {
-				break
-			}
-			i++
-		}
-	}
-	sc.rules[key] = rule
-	return key
-}
-
-func (sc *LLama31SchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
-	st, existType := schema["type"]
-	var schemaType string
-	if existType {
-		schemaType = st.(string)
-	}
-	ruleName := name
-	if name == "" {
-		ruleName = "root"
-	}
-	_, oneOfExists := schema["oneOf"]
-	_, anyOfExists := schema["anyOf"]
-	if oneOfExists || anyOfExists {
-		var alternatives []string
-		oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
-		anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
-
-		if oneOfExists {
-			for i, altSchema := range oneOfSchemas {
-				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
-				if err != nil {
-					return "", err
-				}
-				alternatives = append(alternatives, alternative)
-			}
-		} else if anyOfExists {
-			for i, altSchema := range anyOfSchemas {
-				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
-				if err != nil {
-					return "", err
-				}
-				alternatives = append(alternatives, alternative)
-			}
-		}
-
-		rule := strings.Join(alternatives, " | ")
-		return sc.addRule(ruleName, rule), nil
-	} else if ref, exists := schema["$ref"].(string); exists {
-		referencedSchema, err := sc.resolveReference(ref, rootSchema)
-		if err != nil {
-			return "", err
-		}
-		return sc.visit(referencedSchema, name, rootSchema)
-	} else if constVal, exists := schema["const"]; exists {
-
-		literal, err := sc.formatLiteral((constVal))
-		if err != nil {
-			return "", err
-		}
-		return sc.addRule(ruleName, literal), nil
-	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
-		var enumRules []string
-		for _, enumVal := range enumVals {
-			enumRule, err := sc.formatLiteralQuoted(enumVal)
-			if err != nil {
-				return "", err
-			}
-			enumRules = append(enumRules, enumRule)
-		}
-		rule := strings.Join(enumRules, " | ")
-		return sc.addRule(ruleName, rule), nil
-	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
-		baseProperty := false
-		depth := strings.Split(name, "-")
-		if len(depth) == 2 {
-			baseProperty = true
-		}
-		type propData []struct {
-			propName   string
-			propSchema map[string]interface{}
-		}
-		var propPairs propData
-
-		for propName, propSchema := range properties {
-			propPairs = append(propPairs, struct {
-				propName   string
-				propSchema map[string]interface{}
-			}{propName: propName, propSchema: propSchema.(map[string]interface{})})
-		}
-
-		sort.Slice(propPairs, func(i, j int) bool {
-			return propPairs[i].propName < propPairs[j].propName
-		})
-
-		var rule strings.Builder
-		if baseProperty {
-			rule.WriteString(`"<function="`)
-		} else {
-			rule.WriteString(`"{" space`)
-		}
-
-		if baseProperty {
-
-			namePair := propData{}
-			for i, propPair := range propPairs {
-				propName := propPair.propName
-				if propName == sc.fnName {
-					namePair = append(namePair, propPair)
-					// remove namePair from propPairs
-					propPairs = append(propPairs[:i], propPairs[i+1:]...)
-					break
-				}
-			}
-			if len(namePair) == 0 {
-				return "", fmt.Errorf("no function name found in the schema: %s", schema)
-			}
-
-			propRuleName, err := sc.visit(namePair[0].propSchema, fmt.Sprintf("%s-%s", ruleName, sc.fnName), rootSchema)
-			if err != nil {
-				return "", err
-			}
-
-			rule.WriteString(fmt.Sprintf(` %s ">{" `, propRuleName))
-
-			for _, propPair := range propPairs {
-				propName := propPair.propName
-				propSchema := propPair.propSchema
-				propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
-				if err != nil {
-					return "", err
-				}
-
-				rule.WriteString(propRuleName)
-			}
-
-			rule.WriteString(` "}</function>"`)
-
-		} else {
-			for i, propPair := range propPairs {
-				propName := propPair.propName
-				propSchema := propPair.propSchema
-				propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
-				if err != nil {
-					return "", err
-				}
-				lPropName, err := sc.formatLiteralQuoted(propName)
-				if err != nil {
-					return "", err
-				}
-				if i > 0 {
-					rule.WriteString(` "," space`)
-				}
-
-				rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
-			}
-
-		}
-
-		if !baseProperty {
-			rule.WriteString(` "}" space`)
-		}
-
-		return sc.addRule(ruleName, rule.String()), nil
-	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
-		itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
-		if err != nil {
-			return "", err
-		}
-		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
-		return sc.addRule(ruleName, rule), nil
-	} else {
-		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
-		if !exists {
-			return "", fmt.Errorf("unrecognized schema: %v", schema)
-		}
-		if ruleName == "root" {
-			schemaType = "root"
-		}
-		return sc.addRule(schemaType, primitiveRule), nil
-	}
-}
-func (sc *LLama31SchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
-	if !strings.HasPrefix(ref, "#/$defs/") {
-		return nil, fmt.Errorf("invalid reference format: %s", ref)
-	}
-
-	defKey := strings.TrimPrefix(ref, "#/$defs/")
-	definitions, exists := rootSchema["$defs"].(map[string]interface{})
-	if !exists {
-		return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
-	}
-
-	def, exists := definitions[defKey].(map[string]interface{})
-	if !exists {
-		return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
-	}
-
-	return def, nil
-}
-
-func (sc *LLama31SchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
-	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
-	_, err := sc.visit(schema, "", schema)
-	if err != nil {
-		return "", err
-	}
-	return sc.rules.ToGrammar(options...), nil
-}
-
-func (sc *LLama31SchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
-	var schema map[string]interface{}
-	err := json.Unmarshal(b, &schema)
-	if err != nil {
-		return "", err
-	}
-	return sc.Grammar(schema, options...)
-}
--- a/pkg/functions/grammars/llama31_schema_test.go
+++ b/pkg/functions/grammars/llama31_schema_test.go
@@ -1,76 +0,0 @@
-package grammars_test
-
-import (
-	"strings"
-
-	. "github.com/mudler/LocalAI/pkg/functions/grammars"
-	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
-)
-
-const (
-	testllama31Input1 = `
-	{
-		"oneOf": [
-			{
-				"type": "object",
-				"properties": {
-					"function": {"const": "create_event"},
-					"arguments": {
-						"type": "object",
-						"properties": {
-							"title": {"type": "string"},
-							"date": {"type": "string"},
-							"time": {"type": "string"}
-						}
-					}
-				}
-			},
-			{
-				"type": "object",
-				"properties": {
-					"function": {"const": "search"},
-					"arguments": {
-						"type": "object",
-						"properties": {
-							"query": {"type": "string"}
-						}
-					}
-				}
-			}
-		]
-	}`
-	// <function=example_function_name>{{"example_name": "example_value"}}</function>
-	testllama31inputResult1 = `root-0-function ::= "create_event"
-freestring ::= (
-		[^"\\] |
-		"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-  )* space
-root-0 ::= "<function=" root-0-function ">{" root-0-arguments "}</function>"
-root-1-arguments ::= "{" space "\"query\"" space ":" space string "}" space
-root ::= root-0 | root-1
-space ::= " "?
-root-0-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
-root-1 ::= "<function=" root-1-function ">{" root-1-arguments "}</function>"
-string ::= "\"" (
-	[^"\\] |
-	"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-)* "\"" space
-root-1-function ::= "search"`
-)
-
-var _ = Describe("JSON schema grammar tests", func() {
-	Context("JSON", func() {
-		It("generates a valid grammar from JSON schema", func() {
-			grammar, err := NewLLama31SchemaConverter("function").GrammarFromBytes([]byte(testllama31Input1))
-			Expect(err).ToNot(HaveOccurred())
-			results := strings.Split(testllama31inputResult1, "\n")
-			for _, r := range results {
-				if r != "" {
-					Expect(grammar).To(ContainSubstring(r))
-				}
-			}
-			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
-		})
-	})
-})
--- a/pkg/functions/grammars/rules.go
+++ b/pkg/functions/grammars/rules.go
@@ -1,93 +0,0 @@
-package grammars
-
-import (
-	"fmt"
-	"strings"
-
-	"github.com/mudler/LocalAI/pkg/utils"
-)
-
-type Rules map[string]string
-
-func (rules Rules) ToGrammar(options ...func(*GrammarOption)) string {
-	grammarOpts := &GrammarOption{}
-	grammarOpts.Apply(options...)
-
-	prefix := grammarOpts.Prefix
-	maybeArray := grammarOpts.MaybeArray
-	disableParallelNewLines := grammarOpts.DisableParallelNewLines
-	maybeString := grammarOpts.MaybeString
-	noMixedFreeString := grammarOpts.NoMixedFreeString
-
-	var lines []string
-
-	swapRoot := maybeArray || maybeString || prefix != ""
-
-	// write down the computed rules.
-	// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
-	for name, rule := range rules {
-		if swapRoot && name == "root" {
-			name = "realvalue"
-		}
-		lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
-	}
-
-	if !swapRoot {
-		return strings.Join(lines, "\n")
-	}
-
-	newRoot := "realvalue"
-	if maybeArray {
-		newRoot = "arr | realvalue"
-	}
-
-	freestringRule := "mixedstring"
-	if noMixedFreeString {
-		freestringRule = "freestring"
-	}
-
-	if prefix != "" {
-		// quote newlines in suffix
-		prefix = utils.EscapeNewLines(prefix)
-
-		if maybeArray && maybeString {
-			newRoot = "(" + newRoot + ")"
-		}
-
-		if maybeString {
-			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
-			newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
-		} else {
-			newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
-		}
-	} else if maybeString {
-		if maybeArray {
-			//	newRoot = "(" + newRoot + ")"
-		}
-
-		newRoot = freestringRule + " | " + newRoot
-	}
-
-	lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
-	if disableParallelNewLines {
-		lines = append(lines, array)
-	} else {
-		lines = append(lines, arrayNewLines)
-	}
-
-	if maybeArray {
-		if grammarOpts.ExpectStringsAfterJSON {
-			lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
-		} else {
-			lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
-		}
-	} else {
-		if grammarOpts.ExpectStringsAfterJSON {
-			lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
-		} else {
-			lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
-		}
-	}
-
-	return strings.Join(lines, "\n")
-}
--- a/pkg/functions/grammars/types.go
+++ b/pkg/functions/grammars/types.go
@@ -1,33 +0,0 @@
-package grammars
-
-type SchemaConverterType int
-
-const (
-	JSONSchema SchemaConverterType = iota
-	LLama31Schema
-)
-
-const (
-	LlamaType string = "llama3.1"
-	JSONType  string = "json"
-)
-
-func (s SchemaConverterType) String() string {
-	switch s {
-	case JSONSchema:
-		return JSONType
-	case LLama31Schema:
-		return LlamaType
-	}
-	return "unknown"
-}
-
-func NewType(t string) SchemaConverterType {
-	switch t {
-	case JSONType:
-		return JSONSchema
-	case LlamaType:
-		return LLama31Schema
-	}
-	return JSONSchema
-}
--- a/pkg/functions/json_mode.go
+++ b/pkg/functions/json_mode.go
@@ -1,28 +0,0 @@
-package functions
-
-const (
-	JSONBNF = `root   ::= object
-value  ::= object | array | string | number | ("true" | "false" | "null") ws
-
-object ::=
-  "{" ws (
-            string ":" ws value
-    ("," ws string ":" ws value)*
-  )? "}" ws
-
-array  ::=
-  "[" ws (
-            value
-    ("," ws value)*
-  )? "]" ws
-
-string ::=
-  "\"" (
-    [^"\\] |
-    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
-  )* "\"" ws
-
-number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
-
-ws ::= ([ \t\n] ws)?`
-)
--- a/pkg/functions/grammars/options.go
+++ b/pkg/functions/grammars/options.go
@@ -1,4 +1,4 @@
-package grammars
+package functions

 type GrammarOption struct {
 	PropOrder               string
@@ -8,9 +8,6 @@ type GrammarOption struct {
 	MaybeString             bool
 	NoMixedFreeString       bool
 	ExpectStringsAfterJSON  bool
-
-	FunctionName string
-	SchemaType   SchemaConverterType
 }

 func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@@ -51,15 +48,3 @@ func SetPropOrder(order string) func(*GrammarOption) {
 		o.PropOrder = order
 	}
 }
-
-func WithSchemaType(schemaType SchemaConverterType) func(*GrammarOption) {
-	return func(o *GrammarOption) {
-		o.SchemaType = schemaType
-	}
-}
-
-func WithFunctionName(name string) func(*GrammarOption) {
-	return func(o *GrammarOption) {
-		o.FunctionName = name
-	}
-}
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -7,7 +7,6 @@ import (
 	"regexp"
 	"strings"

-	"github.com/mudler/LocalAI/pkg/functions/grammars"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
@@ -23,9 +22,7 @@ type GrammarConfig struct {
 	MixedMode bool `yaml:"mixed_mode"`

 	// NoMixedFreeString disables the mixed mode for free strings
-	// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects.
-	// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
-	// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
+	// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
 	NoMixedFreeString bool `yaml:"no_mixed_free_string"`

 	// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
@@ -42,10 +39,6 @@ type GrammarConfig struct {
 	// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
 	// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
 	PropOrder string `yaml:"properties_order"`
-
-	// SchemaType can be configured to use a specific schema type to force the grammar
-	// available : json, llama3.1
-	SchemaType string `yaml:"schema_type"`
 }

 // FunctionsConfig is the configuration for the tool/function call.
@@ -99,36 +92,28 @@ type FuncCallResults struct {
 	Arguments string
 }

-func (g FunctionsConfig) GrammarOptions() []func(o *grammars.GrammarOption) {
-	opts := []func(o *grammars.GrammarOption){}
-	if g.GrammarConfig.MixedMode {
-		opts = append(opts, grammars.EnableMaybeString)
+func (g GrammarConfig) Options() []func(o *GrammarOption) {
+	opts := []func(o *GrammarOption){}
+	if g.MixedMode {
+		opts = append(opts, EnableMaybeString)
 	}
-	if g.GrammarConfig.ParallelCalls {
-		opts = append(opts, grammars.EnableMaybeArray)
+	if g.ParallelCalls {
+		opts = append(opts, EnableMaybeArray)
 	}
-	if g.GrammarConfig.DisableParallelNewLines {
-		opts = append(opts, grammars.DisableParallelNewLines)
+	if g.DisableParallelNewLines {
+		opts = append(opts, DisableParallelNewLines)
 	}
-	if g.GrammarConfig.Prefix != "" {
-		opts = append(opts, grammars.SetPrefix(g.GrammarConfig.Prefix))
+	if g.Prefix != "" {
+		opts = append(opts, SetPrefix(g.Prefix))
 	}
-	if g.GrammarConfig.NoMixedFreeString {
-		opts = append(opts, grammars.NoMixedFreeString)
+	if g.NoMixedFreeString {
+		opts = append(opts, NoMixedFreeString)
 	}
-	if g.GrammarConfig.ExpectStringsAfterJSON {
-		opts = append(opts, grammars.ExpectStringsAfterJSON)
+	if g.ExpectStringsAfterJSON {
+		opts = append(opts, ExpectStringsAfterJSON)
 	}

-	if g.GrammarConfig.SchemaType != "" {
-		opts = append(opts, grammars.WithSchemaType(grammars.NewType(g.GrammarConfig.SchemaType)))
-	}
-
-	if g.FunctionNameKey != "" {
-		opts = append(opts, grammars.WithFunctionName(g.FunctionNameKey))
-	}
-
-	opts = append(opts, grammars.SetPropOrder(g.GrammarConfig.PropOrder))
+	opts = append(opts, SetPropOrder(g.PropOrder))
 	return opts
 }

--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundCUDA = true
 				} else {
-					log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
+					log.Info().Msgf("GPU device found but no CUDA backend present")
 				}
 			}
 			if strings.Contains(gpu.String(), "amd") {
@@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundAMDGPU = true
 				} else {
-					log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
+					log.Info().Msgf("GPU device found but no HIPBLAS backend present")
 				}
 			}
 			if strings.Contains(gpu.String(), "intel") {
@@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundIntelGPU = true
 				} else {
-					log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
+					log.Info().Msgf("GPU device found but no Intel backend present")
 				}
 			}
 		}
--- a/pkg/utils/strings.go
+++ b/pkg/utils/strings.go
@@ -18,15 +18,3 @@ func RandString(n int) string {
 	}
 	return string(b)
 }
-
-func Unique(arr []string) []string {
-	unique := make(map[string]bool)
-	var result []string
-	for _, item := range arr {
-		if _, ok := unique[item]; !ok {
-			unique[item] = true
-			result = append(result, item)
-		}
-	}
-	return result
-}
Author	SHA1	Message	Date
Ettore Di Giacinto	f3e170b79f	debu2	2024-07-22 12:21:55 +02:00
Ettore Di Giacinto	84ab2f3d11	debug Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2024-07-22 10:17:41 +02:00