recursive

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
fixups
2026-02-03 03:02:38 -05:00 · 2026-01-04 20:22:02 +00:00 · 2026-01-04 20:22:02 +00:00 · 2026-01-04 20:22:02 +00:00 · 2026-01-04 21:12:50 +01:00 · 2026-01-04 10:27:45 +01:00
19 changed files with 817 additions and 412 deletions
--- a/262
+++ b/262
@@ -6,6 +6,7 @@ LAUNCHER_BINARY_NAME=local-ai-launcher

 CUDA_MAJOR_VERSION?=13
 CUDA_MINOR_VERSION?=0
+UBUNTU_VERSION?=2204

 GORELEASER?=

@@ -155,7 +156,16 @@ test: test-models/testmodel.ggml protogen-go
 ########################################################

 docker-build-aio:
-	docker build --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
+	docker build \
+		--build-arg MAKEFLAGS="--jobs=5 --output-sync=target" \
+		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
+		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
+		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
+		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
+		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg GO_TAGS="$(GO_TAGS)" \
+		-t local-ai:tests -f Dockerfile .
 	BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio

 e2e-aio:
@@ -177,7 +187,16 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
+	docker build \
+		--build-arg IMAGE_TYPE=core \
+		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
+		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
+		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
+		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg GO_TAGS="$(GO_TAGS)" \
+		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
+		-t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -308,6 +327,9 @@ docker:
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
+		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
+		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
+		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
 		-t $(DOCKER_IMAGE) .

 docker-cuda11:
@@ -319,6 +341,7 @@ docker-cuda11:
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
+		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
 		-t $(DOCKER_IMAGE)-cuda-11 .

 docker-aio:
@@ -326,6 +349,9 @@ docker-aio:
 	docker build \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
+		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
+		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
+		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
 		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .

 docker-aio-all:
@@ -338,62 +364,26 @@ docker-image-intel:
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg BUILD_TYPE=intel -t $(DOCKER_IMAGE) .
+		--build-arg BUILD_TYPE=intel \
+		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
+		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
+		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		-t $(DOCKER_IMAGE) .

 ########################################################
 ## Backends
 ########################################################

+# Pattern rule for standard backends (docker-based)
+# This matches all backends that use docker-build-* and docker-save-*
+backends/%: docker-build-% docker-save-% build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/$*.tar)"

-backends/diffusers: docker-build-diffusers docker-save-diffusers build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
-
-backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
-
-backends/piper: docker-build-piper docker-save-piper build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
-
-backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
-
-backends/whisper: docker-build-whisper docker-save-whisper build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
-
-backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
-
-backends/local-store: docker-build-local-store docker-save-local-store build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
-
-backends/huggingface: docker-build-huggingface docker-save-huggingface build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
-
-backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"
-
-backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"
-
-backends/kokoro: docker-build-kokoro docker-save-kokoro build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
-
-backends/chatterbox: docker-build-chatterbox docker-save-chatterbox build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/chatterbox.tar)"
-
+# Darwin-specific backends (keep as explicit targets since they have special build logic)
 backends/llama-cpp-darwin: build
 	bash ./scripts/build/llama-cpp-darwin.sh
 	./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"

-backends/neutts: docker-build-neutts docker-save-neutts build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/neutts.tar)"
-
-backends/vllm: docker-build-vllm docker-save-vllm build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/vllm.tar)"
-
-backends/vibevoice: docker-build-vibevoice docker-save-vibevoice build
-	./local-ai backends install "ocifile://$(abspath ./backend-images/vibevoice.tar)"
-
 build-darwin-python-backend: build
 	bash ./scripts/build/python-darwin.sh

@@ -423,119 +413,83 @@ backends/stablediffusion-ggml-darwin:
 backend-images:
 	mkdir -p backend-images

-docker-build-llama-cpp:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
+# Backend metadata: BACKEND_NAME | DOCKERFILE_TYPE | BUILD_CONTEXT | PROGRESS_FLAG | NEEDS_BACKEND_ARG
+# llama-cpp is special - uses llama-cpp Dockerfile and doesn't need BACKEND arg
+BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false

-docker-build-bark-cpp:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .
+# Golang backends
+BACKEND_BARK_CPP = bark-cpp|golang|.|false|true
+BACKEND_PIPER = piper|golang|.|false|true
+BACKEND_LOCAL_STORE = local-store|golang|.|false|true
+BACKEND_HUGGINGFACE = huggingface|golang|.|false|true
+BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
+BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
+BACKEND_WHISPER = whisper|golang|.|false|true

-docker-build-piper:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper .
+# Python backends with root context
+BACKEND_RERANKERS = rerankers|python|.|false|true
+BACKEND_TRANSFORMERS = transformers|python|.|false|true
+BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true
+BACKEND_COQUI = coqui|python|.|false|true
+BACKEND_BARK = bark|python|.|false|true
+BACKEND_EXLLAMA2 = exllama2|python|.|false|true

-docker-build-local-store:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store .
+# Python backends with ./backend context
+BACKEND_RFDETR = rfdetr|python|./backend|false|true
+BACKEND_KITTEN_TTS = kitten-tts|python|./backend|false|true
+BACKEND_NEUTTS = neutts|python|./backend|false|true
+BACKEND_KOKORO = kokoro|python|./backend|false|true
+BACKEND_VLLM = vllm|python|./backend|false|true
+BACKEND_DIFFUSERS = diffusers|python|./backend|--progress=plain|true
+BACKEND_CHATTERBOX = chatterbox|python|./backend|false|true
+BACKEND_VIBEVOICE = vibevoice|python|./backend|--progress=plain|true

-docker-build-huggingface:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface .
+# Helper function to build docker image for a backend
+# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
+define docker-build-backend
+	docker build $(if $(filter-out false,$(4)),$(4)) \
+		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
+		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
+		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
+		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		$(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \
+		-t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3)
+endef

-docker-build-rfdetr:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr ./backend
+# Generate docker-build targets from backend definitions
+define generate-docker-build-target
+docker-build-$(word 1,$(subst |, ,$(1))):
+	$$(call docker-build-backend,$(word 1,$(subst |, ,$(1))),$(word 2,$(subst |, ,$(1))),$(word 3,$(subst |, ,$(1))),$(word 4,$(subst |, ,$(1))),$(word 5,$(subst |, ,$(1))))
+endef

-docker-build-kitten-tts:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts ./backend
+# Generate all docker-build targets
+$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
+$(eval $(call generate-docker-build-target,$(BACKEND_BARK_CPP)))
+$(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
+$(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
+$(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
+$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
+$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
+$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
+$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
+$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
+$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER)))
+$(eval $(call generate-docker-build-target,$(BACKEND_COQUI)))
+$(eval $(call generate-docker-build-target,$(BACKEND_BARK)))
+$(eval $(call generate-docker-build-target,$(BACKEND_EXLLAMA2)))
+$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR)))
+$(eval $(call generate-docker-build-target,$(BACKEND_KITTEN_TTS)))
+$(eval $(call generate-docker-build-target,$(BACKEND_NEUTTS)))
+$(eval $(call generate-docker-build-target,$(BACKEND_KOKORO)))
+$(eval $(call generate-docker-build-target,$(BACKEND_VLLM)))
+$(eval $(call generate-docker-build-target,$(BACKEND_DIFFUSERS)))
+$(eval $(call generate-docker-build-target,$(BACKEND_CHATTERBOX)))
+$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))

-docker-save-kitten-tts: backend-images
-	docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
-
-docker-save-chatterbox: backend-images
-	docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
-
-docker-save-vibevoice: backend-images
-	docker save local-ai-backend:vibevoice -o backend-images/vibevoice.tar
-
-docker-build-neutts:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts ./backend
-
-docker-save-neutts: backend-images
-	docker save local-ai-backend:neutts -o backend-images/neutts.tar
-
-docker-build-kokoro:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
-
-docker-build-vllm:
-	docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend
-
-docker-save-vllm: backend-images
-	docker save local-ai-backend:vllm -o backend-images/vllm.tar
-
-docker-save-kokoro: backend-images
-	docker save local-ai-backend:kokoro -o backend-images/kokoro.tar
-
-docker-save-rfdetr: backend-images
-	docker save local-ai-backend:rfdetr -o backend-images/rfdetr.tar
-
-docker-save-huggingface: backend-images
-	docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
-
-docker-save-local-store: backend-images
-	docker save local-ai-backend:local-store -o backend-images/local-store.tar
-
-docker-build-silero-vad:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad .
-
-docker-save-silero-vad: backend-images
-	docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
-
-docker-save-piper: backend-images
-	docker save local-ai-backend:piper -o backend-images/piper.tar
-
-docker-save-llama-cpp: backend-images
-	docker save local-ai-backend:llama-cpp -o backend-images/llama-cpp.tar
-
-docker-save-bark-cpp: backend-images
-	docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar
-
-docker-build-stablediffusion-ggml:
-	docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml .
-
-docker-save-stablediffusion-ggml: backend-images
-	docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar
-
-docker-build-rerankers:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
-
-docker-build-transformers:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
-
-docker-build-diffusers:
-	docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers ./backend
-
-docker-save-diffusers: backend-images
-	docker save local-ai-backend:diffusers -o backend-images/diffusers.tar
-
-docker-build-whisper:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper  .
-
-docker-save-whisper: backend-images
-	docker save local-ai-backend:whisper -o backend-images/whisper.tar
-
-docker-build-faster-whisper:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
-
-docker-build-coqui:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
-
-docker-build-bark:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
-
-docker-build-chatterbox:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
-
-docker-build-vibevoice:
-	docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vibevoice -f backend/Dockerfile.python --build-arg BACKEND=vibevoice ./backend
-
-docker-build-exllama2:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
+# Pattern rule for docker-save targets
+docker-save-%: backend-images
+	docker save local-ai-backend:$* -o backend-images/$*.tar

 docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-exllama2

--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@

-LLAMA_VERSION?=ced765be44ce173c374f295b3c6f4175f8fd109b
+LLAMA_VERSION?=e57f52334b2e8436a94f7e332462dfc63a08f995
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp

 CMAKE_ARGS?=
@@ -8,6 +8,7 @@ NATIVE?=false
 ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
 TARGET?=--target grpc-server
 JOBS?=$(shell nproc)
+ARCH?=$(shell uname -m)

 # Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
 CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
@@ -106,21 +107,39 @@ llama-cpp-avx: llama.cpp
 	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
 	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
 	$(info ${GREEN}I llama-cpp build info:avx${RESET})
+ifeq ($(OS),Darwin)
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
+else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
+else
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS=-mno-bmi2 -DCMAKE_CXX_FLAGS=-mno-bmi2" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
+endif
 	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx

 llama-cpp-fallback: llama.cpp
 	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
 	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
+ifeq ($(OS),Darwin)
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
+else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
+else
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
+endif
 	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback

 llama-cpp-grpc: llama.cpp
 	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
 	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
+ifeq ($(OS),Darwin)
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
+else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
+else
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
+endif
 	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc

 llama-cpp-rpc-server: llama-cpp-grpc
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -293,6 +293,8 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
    return data;
 }

+// Sanitize tools JSON to remove null values from tool.parameters.properties
+// This prevents Jinja template errors when processing tools with malformed parameter schemas

 const std::vector<ggml_type> kv_cache_types = {
    GGML_TYPE_F32,
--- a/core/http/endpoints/localai/mcp.go
+++ b/core/http/endpoints/localai/mcp.go
@@ -53,12 +53,12 @@ type MCPErrorEvent struct {
 	Message string `json:"message"`
 }

-// MCPStreamEndpoint is the SSE streaming endpoint for MCP chat completions
+// MCPEndpoint is the endpoint for MCP chat completions. Supports SSE mode, but it is not compatible with the OpenAI apis.
 // @Summary Stream MCP chat completions with reasoning, tool calls, and results
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/mcp/chat/completions [post]
-func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		ctx := c.Request().Context()
 		created := int(time.Now().Unix())
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -622,7 +622,9 @@ func handleQuestion(config *config.ModelConfig, cl *config.ModelConfigLoader, in
 	// Serialize tools and tool_choice to JSON strings
 	toolsJSON := ""
 	if len(input.Tools) > 0 {
-		toolsBytes, err := json.Marshal(input.Tools)
+		// Sanitize tools to remove null values from parameters.properties
+		sanitizedTools := functions.SanitizeTools(input.Tools)
+		toolsBytes, err := json.Marshal(sanitizedTools)
 		if err == nil {
 			toolsJSON = string(toolsBytes)
 		}
--- a/core/http/endpoints/openai/inference.go
+++ b/core/http/endpoints/openai/inference.go
@@ -7,6 +7,7 @@ import (
 	"github.com/mudler/LocalAI/core/config"

 	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
 	model "github.com/mudler/LocalAI/pkg/model"
 )

@@ -42,7 +43,9 @@ func ComputeChoices(
 	// Serialize tools and tool_choice to JSON strings
 	toolsJSON := ""
 	if len(req.Tools) > 0 {
-		toolsBytes, err := json.Marshal(req.Tools)
+		// Sanitize tools to remove null values from parameters.properties
+		sanitizedTools := functions.SanitizeTools(req.Tools)
+		toolsBytes, err := json.Marshal(sanitizedTools)
 		if err == nil {
 			toolsJSON = string(toolsBytes)
 		}
--- a/core/http/endpoints/openai/mcp.go
+++ b/core/http/endpoints/openai/mcp.go
@@ -1,148 +0,0 @@
-package openai
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"net"
-	"time"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/config"
-	mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
-	"github.com/mudler/LocalAI/core/http/middleware"
-
-	"github.com/google/uuid"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/templates"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/cogito"
-	"github.com/mudler/xlog"
-)
-
-// MCPCompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
-// @Summary Generate completions for a given prompt and model.
-// @Param request body schema.OpenAIRequest true "query params"
-// @Success 200 {object} schema.OpenAIResponse "Response"
-// @Router /mcp/v1/completions [post]
-func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
-	// We do not support streaming mode (Yet?)
-	return func(c echo.Context) error {
-		created := int(time.Now().Unix())
-
-		ctx := c.Request().Context()
-
-		// Handle Correlation
-		id := c.Request().Header.Get("X-Correlation-ID")
-		if id == "" {
-			id = uuid.New().String()
-		}
-
-		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
-		if !ok || input.Model == "" {
-			return echo.ErrBadRequest
-		}
-
-		config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
-		if !ok || config == nil {
-			return echo.ErrBadRequest
-		}
-
-		if config.MCP.Servers == "" && config.MCP.Stdio == "" {
-			return fmt.Errorf("no MCP servers configured")
-		}
-
-		// Get MCP config from model config
-		remote, stdio, err := config.MCP.MCPConfigFromYAML()
-		if err != nil {
-			return fmt.Errorf("failed to get MCP config: %w", err)
-		}
-
-		// Check if we have tools in cache, or we have to have an initial connection
-		sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio)
-		if err != nil {
-			return fmt.Errorf("failed to get MCP sessions: %w", err)
-		}
-
-		if len(sessions) == 0 {
-			return fmt.Errorf("no working MCP servers found")
-		}
-
-		fragment := cogito.NewEmptyFragment()
-
-		for _, message := range input.Messages {
-			fragment = fragment.AddMessage(message.Role, message.StringContent)
-		}
-
-		_, port, err := net.SplitHostPort(appConfig.APIAddress)
-		if err != nil {
-			return err
-		}
-
-		apiKey := ""
-		if appConfig.ApiKeys != nil {
-			apiKey = appConfig.ApiKeys[0]
-		}
-
-		ctxWithCancellation, cancel := context.WithCancel(ctx)
-		defer cancel()
-
-		// TODO: instead of connecting to the API, we should just wire this internally
-		// and act like completion.go.
-		// We can do this as cogito expects an interface and we can create one that
-		// we satisfy to just call internally ComputeChoices
-		defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
-
-		// Build cogito options using the consolidated method
-		cogitoOpts := config.BuildCogitoOptions()
-
-		cogitoOpts = append(
-			cogitoOpts,
-			cogito.WithContext(ctxWithCancellation),
-			cogito.WithMCPs(sessions...),
-			cogito.WithStatusCallback(func(s string) {
-				xlog.Debug("[model agent] Status", "model", config.Name, "status", s)
-			}),
-			cogito.WithReasoningCallback(func(s string) {
-				xlog.Debug("[model agent] Reasoning", "model", config.Name, "reasoning", s)
-			}),
-			cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision {
-				xlog.Debug("[model agent] Tool call", "model", config.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments)
-				return cogito.ToolCallDecision{
-					Approved: true,
-				}
-			}),
-			cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
-				xlog.Debug("[model agent] Tool call result", "model", config.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments)
-			}),
-		)
-
-		f, err := cogito.ExecuteTools(
-			defaultLLM, fragment,
-			cogitoOpts...,
-		)
-		if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
-			return err
-		}
-
-		f, err = defaultLLM.Ask(ctx, f)
-		if err != nil {
-			return err
-		}
-
-		resp := &schema.OpenAIResponse{
-			ID:      id,
-			Created: created,
-			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}},
-			Object:  "text_completion",
-		}
-
-		jsonResult, _ := json.Marshal(resp)
-		xlog.Debug("Response", "response", string(jsonResult))
-
-		// Return the prediction in the response body
-		return c.JSON(200, resp)
-	}
-}
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -137,9 +137,10 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
 		requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }))

-	// MCP Stream endpoint
+	// MCP endpoint - supports both streaming and non-streaming modes
+	// Note: streaming mode is NOT compatible with the OpenAI apis. We have a set which streams more states.
 	if evaluator != nil {
-		mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig)
+		mcpStreamHandler := localai.MCPEndpoint(cl, ml, evaluator, appConfig)
 		mcpStreamMiddleware := []echo.MiddlewareFunc{
 			requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
 			requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
@@ -154,6 +155,7 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		}
 		router.POST("/v1/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
 		router.POST("/mcp/v1/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
+		router.POST("/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
 	}

 	// Agent job routes
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -79,24 +79,6 @@ func RegisterOpenAIRoutes(app *echo.Echo,
 	app.POST("/completions", completionHandler, completionMiddleware...)
 	app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...)

-	// MCPcompletion
-	mcpCompletionHandler := openai.MCPCompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
-	mcpCompletionMiddleware := []echo.MiddlewareFunc{
-		traceMiddleware,
-		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
-		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
-		func(next echo.HandlerFunc) echo.HandlerFunc {
-			return func(c echo.Context) error {
-				if err := re.SetOpenAIRequest(c); err != nil {
-					return err
-				}
-				return next(c)
-			}
-		},
-	}
-	app.POST("/mcp/v1/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
-	app.POST("/mcp/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
-
 	// embeddings
 	embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 	embeddingMiddleware := []echo.MiddlewareFunc{
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -954,7 +954,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		if !appConfig.EnableTracing {
 			return c.JSON(503, map[string]any{
 				"error": "Tracing disabled",
-				})
+			})
 		}
 		traces := middleware.GetTraces()
 		return c.JSON(200, map[string]interface{}{
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -750,6 +750,7 @@ function stopRequest() {
  if (!activeChat) return;
  
  const request = activeRequests.get(activeChat.id);
+  const requestModel = request?.model || null; // Get model before deleting request
  if (request) {
    if (request.controller) {
      request.controller.abort();
@@ -779,7 +780,8 @@ function stopRequest() {
    `<span class='error'>Request cancelled by user</span>`,
    null,
    null,
-    activeChat.id
+    activeChat.id,
+    requestModel
  );
 }

@@ -1231,7 +1233,8 @@ async function promptGPT(systemPrompt, input) {
      startTime: requestStartTime,
      tokensReceived: 0,
      interval: null,
-      maxTokensPerSecond: 0
+      maxTokensPerSecond: 0,
+      model: model // Store the model used for this request
    });
    
    // Update reactive tracking for UI indicators
@@ -1271,21 +1274,27 @@ async function promptGPT(systemPrompt, input) {
        return;
      } else {
        // Timeout error (controller was aborted by timeout, not user)
+        const request = activeRequests.get(chatId);
+        const requestModel = request?.model || null;
        chatStore.add(
          "assistant",
          `<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
          null,
          null,
-          chatId
+          chatId,
+          requestModel
        );
      }
    } else {
+      const request = activeRequests.get(chatId);
+      const requestModel = request?.model || null;
      chatStore.add(
        "assistant",
        `<span class='error'>Network Error: ${error.message}</span>`,
        null,
        null,
-        chatId
+        chatId,
+        requestModel
      );
    }
    toggleLoader(false, chatId);
@@ -1299,12 +1308,15 @@ async function promptGPT(systemPrompt, input) {
  }

  if (!response.ok) {
+    const request = activeRequests.get(chatId);
+    const requestModel = request?.model || null;
    chatStore.add(
      "assistant",
      `<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
      null,
      null,
-      chatId
+      chatId,
+      requestModel
    );
    toggleLoader(false, chatId);
    activeRequests.delete(chatId);
@@ -1324,12 +1336,15 @@ async function promptGPT(systemPrompt, input) {
      .getReader();

    if (!reader) {
+      const request = activeRequests.get(chatId);
+      const requestModel = request?.model || null;
      chatStore.add(
        "assistant",
        `<span class='error'>Error: Failed to decode MCP API response</span>`,
        null,
        null,
-        chatId
+        chatId,
+        requestModel
      );
      toggleLoader(false, chatId);
      activeRequests.delete(chatId);
@@ -1598,12 +1613,15 @@ async function promptGPT(systemPrompt, input) {
                  break;
                
                case "error":
+                  const request = activeRequests.get(chatId);
+                  const requestModel = request?.model || null;
                  chatStore.add(
                    "assistant",
                    `<span class='error'>MCP Error: ${eventData.message}</span>`,
                    null,
                    null,
-                    chatId
+                    chatId,
+                    requestModel
                  );
                  break;
              }
@@ -1624,9 +1642,11 @@ async function promptGPT(systemPrompt, input) {
          // Update or create assistant message with processed regular content
          const currentChat = chatStore.getChat(chatId);
          if (!currentChat) break; // Chat was deleted
+          const request = activeRequests.get(chatId);
+          const requestModel = request?.model || null;
          if (lastAssistantMessageIndex === -1) {
            if (processedRegular && processedRegular.trim()) {
-              chatStore.add("assistant", processedRegular, null, null, chatId);
+              chatStore.add("assistant", processedRegular, null, null, chatId, requestModel);
              lastAssistantMessageIndex = targetHistory.length - 1;
            }
          } else {
@@ -1706,7 +1726,9 @@ async function promptGPT(systemPrompt, input) {
            lastMessage.html = DOMPurify.sanitize(marked.parse(lastMessage.content));
          }
        } else if (processedRegular && processedRegular.trim()) {
-          chatStore.add("assistant", processedRegular, null, null, chatId);
+          const request = activeRequests.get(chatId);
+          const requestModel = request?.model || null;
+          chatStore.add("assistant", processedRegular, null, null, chatId, requestModel);
          lastAssistantMessageIndex = targetHistory.length - 1;
        }
      }
@@ -1754,7 +1776,9 @@ async function promptGPT(systemPrompt, input) {
              lastMessage.html = DOMPurify.sanitize(marked.parse(lastMessage.content));
            }
          } else {
-            chatStore.add("assistant", finalRegular, null, null, chatId);
+            const request = activeRequests.get(chatId);
+            const requestModel = request?.model || null;
+            chatStore.add("assistant", finalRegular, null, null, chatId, requestModel);
          }
        }
        
@@ -1812,12 +1836,15 @@ async function promptGPT(systemPrompt, input) {
      .getReader();

    if (!reader) {
+      const request = activeRequests.get(chatId);
+      const requestModel = request?.model || null;
      chatStore.add(
        "assistant",
        `<span class='error'>Error: Failed to decode API response</span>`,
        null,
        null,
-        chatId
+        chatId,
+        requestModel
      );
      toggleLoader(false, chatId);
      activeRequests.delete(chatId);
@@ -1848,9 +1875,11 @@ async function promptGPT(systemPrompt, input) {
    const addToChat = (token) => {
      const currentChat = chatStore.getChat(chatId);
      if (!currentChat) return; // Chat was deleted
-      chatStore.add("assistant", token, null, null, chatId);
-      // Count tokens for rate calculation (per chat)
+      // Get model from request for this chat
      const request = activeRequests.get(chatId);
+      const requestModel = request?.model || null;
+      chatStore.add("assistant", token, null, null, chatId, requestModel);
+      // Count tokens for rate calculation (per chat)
      if (request) {
        const tokenCount = Math.ceil(token.length / 4);
        request.tokensReceived += tokenCount;
@@ -2008,12 +2037,15 @@ async function promptGPT(systemPrompt, input) {
      if (error.name !== 'AbortError' || !currentAbortController) {
        const currentChat = chatStore.getChat(chatId);
        if (currentChat) {
+          const request = activeRequests.get(chatId);
+          const requestModel = request?.model || null;
          chatStore.add(
            "assistant",
            `<span class='error'>Error: Failed to process stream</span>`,
            null,
            null,
-            chatId
+            chatId,
+            requestModel
          );
        }
      }
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -276,12 +276,31 @@ SOFTWARE.
          }
        },
        
-        add(role, content, image, audio, targetChatId = null) {
+        add(role, content, image, audio, targetChatId = null, model = null) {
          // If targetChatId is provided, add to that chat, otherwise use active chat
          // This allows streaming to continue to the correct chat even if user switches
          const chat = targetChatId ? this.getChat(targetChatId) : this.activeChat();
          if (!chat) return;
          
+          // Determine model for this message:
+          // - If model is explicitly provided, use it (for assistant messages with specific model)
+          // - For user messages, use the current chat's model
+          // - For other messages (thinking, tool_call, etc.), inherit from previous message or use chat model
+          let messageModel = model;
+          if (!messageModel) {
+            if (role === "user") {
+              // User messages always use the current chat's model
+              messageModel = chat.model || "";
+            } else if (role === "assistant") {
+              // Assistant messages use the chat's model (should be set when request is made)
+              messageModel = chat.model || "";
+            } else {
+              // For thinking, tool_call, etc., try to inherit from last assistant message, or use chat model
+              const lastAssistant = chat.history.slice().reverse().find(m => m.role === "assistant");
+              messageModel = lastAssistant?.model || chat.model || "";
+            }
+          }
+          
          const N = chat.history.length - 1;
          // For thinking, reasoning, tool_call, and tool_result messages, always create a new message
          if (role === "thinking" || role === "reasoning" || role === "tool_call" || role === "tool_result") {
@@ -311,7 +330,7 @@ SOFTWARE.
            // Reasoning, tool_call, and tool_result are always collapsed by default
            const isMCPMode = chat.mcpMode || false;
            const shouldExpand = (role === "thinking" && !isMCPMode) || false;
-            chat.history.push({ role, content, html: c, image, audio, expanded: shouldExpand });
+            chat.history.push({ role, content, html: c, image, audio, expanded: shouldExpand, model: messageModel });
            
            // Auto-name chat from first user message
            if (role === "user" && chat.name === "New Chat" && content.trim()) {
@@ -332,6 +351,10 @@ SOFTWARE.
            if (audio && audio.length > 0) {
              chat.history[N].audio = [...(chat.history[N].audio || []), ...audio];
            }
+            // Preserve model if merging (don't overwrite)
+            if (!chat.history[N].model && messageModel) {
+              chat.history[N].model = messageModel;
+            }
          } else {
            let c = "";
            const lines = content.split("\n");
@@ -343,7 +366,8 @@ SOFTWARE.
              content, 
              html: c, 
              image: image || [], 
-              audio: audio || [] 
+              audio: audio || [],
+              model: messageModel
            });
            
            // Auto-name chat from first user message
@@ -1248,11 +1272,20 @@ SOFTWARE.
                </template>
                <template x-if="message.role != 'user' && message.role != 'thinking' && message.role != 'reasoning' && message.role != 'tool_call' && message.role != 'tool_result'">
                  <div class="flex items-center space-x-2">
-                    {{ if $galleryConfig }}
-                    {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">{{end}}
-                    {{ end }}
+                    <!-- Model icon - from message history, fallback to active chat -->
+                    <template x-if="message.model && window.__galleryConfigs && window.__galleryConfigs[message.model] && window.__galleryConfigs[message.model].Icon">
+                      <img :src="window.__galleryConfigs[message.model].Icon" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">
+                    </template>
+                    <!-- Fallback: use active chat model if message doesn't have one -->
+                    <template x-if="!message.model && $store.chat.activeChat() && $store.chat.activeChat().model && window.__galleryConfigs && window.__galleryConfigs[$store.chat.activeChat().model] && window.__galleryConfigs[$store.chat.activeChat().model].Icon">
+                      <img :src="window.__galleryConfigs[$store.chat.activeChat().model].Icon" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">
+                    </template>
+                    <!-- Final fallback: initial model from server -->
+                    <template x-if="!message.model && (!$store.chat.activeChat() || !$store.chat.activeChat().model) && window.__galleryConfigs && window.__galleryConfigs['{{$model}}'] && window.__galleryConfigs['{{$model}}'].Icon">
+                      <img :src="window.__galleryConfigs['{{$model}}'].Icon" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">
+                    </template>
                    <div class="flex flex-col flex-1">
-                      <span class="text-xs font-semibold text-[var(--color-text-secondary)] mb-1">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
+                      <span class="text-xs font-semibold text-[var(--color-text-secondary)] mb-1" x-text="message.model || $store.chat.activeChat()?.model || '{{if .Model}}{{.Model}}{{else}}Assistant{{end}}'"></span>
                      <div class="flex-1 text-[var(--color-text-primary)] flex items-center space-x-2 min-w-0">
                        <div class="p-3 rounded-lg bg-[var(--color-bg-secondary)] border border-[var(--color-accent-border)]/20 shadow-lg max-w-full overflow-x-auto overflow-wrap-anywhere" x-html="message.html"></div>
                        <button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-[var(--color-text-secondary)] hover:text-[var(--color-primary)] transition-colors p-1 flex-shrink-0">
--- a/docs/content/integrations.md
+++ b/docs/content/integrations.md
@@ -34,3 +34,278 @@ The list below is a list of software that integrates with LocalAI.
 - [Langchain](https://docs.langchain.com/oss/python/integrations/providers/localai) integration package [pypi](https://pypi.org/project/langchain-localai/)

 Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
+
+## Configuration Guides
+
+This section provides step-by-step instructions for configuring specific software to work with LocalAI.
+
+### OpenCode
+
+[OpenCode](https://opencode.ai) is an AI-powered code editor that can be configured to use LocalAI as its backend provider.
+
+#### Prerequisites
+
+- LocalAI must be running and accessible (either locally or on a network)
+- You need to know your LocalAI server's IP address/hostname and port (default is `8080`)
+
+#### Configuration Steps
+
+1. **Edit the OpenCode configuration file**
+
+   Open the OpenCode configuration file located at `~/.config/opencode/opencode.json` in your editor.
+
+2. **Add LocalAI provider configuration**
+
+   Add the following configuration to your `opencode.json` file, replacing the values with your own:
+
+   ```json
+   {
+     "$schema": "https://opencode.ai/config.json",
+     "provider": {
+       "LocalAI": {
+         "npm": "@ai-sdk/openai-compatible",
+         "name": "LocalAI (local)",
+         "options": {
+           "baseURL": "http://127.0.0.1:8080/v1"
+         },
+         "models": {
+           "Qwen3-Coder-30B-A3B-Instruct-i1-GGUF": {
+             "name": "Qwen3-Coder-30B-A3B-Instruct-i1-GGUF",
+             "limit": {
+               "context": 38000,
+               "output": 65536
+             }
+           },
+           "qwen_qwen3-30b-a3b-instruct-2507": {
+             "name": "qwen_qwen3-30b-a3b-instruct-2507",
+             "limit": {
+               "context": 38000,
+               "output": 65536
+             }
+           }
+         }
+       }
+     }
+   }
+   ```
+
+3. **Customize the configuration**
+
+   - **baseURL**: Replace `http://127.0.0.1:8080/v1` with your LocalAI server's address and port.
+   - **name**: Change "LocalAI (local)" to a descriptive name for your setup.
+   - **models**: Replace the model names with the actual model names available in your LocalAI instance. You can find available models by checking your LocalAI models directory or using the LocalAI API.
+   - **limit**: Adjust the `context` and `output` token limits based on your model's capabilities and available resources.
+
+4. **Verify your models**
+
+   Ensure that the model names in the configuration match exactly with the model names configured in your LocalAI instance. You can verify available models by checking your LocalAI configuration or using the `/v1/models` endpoint.
+
+5. **Restart OpenCode**
+
+   After saving the configuration file, restart OpenCode for the changes to take effect.
+
+
+### GitHub Actions
+
+You can use LocalAI in GitHub Actions workflows to perform AI-powered tasks like code review, diff summarization, or automated analysis. The [LocalAI GitHub Action](https://github.com/mudler/localai-github-action) makes it easy to spin up a LocalAI instance in your CI/CD pipeline.
+
+#### Prerequisites
+
+- A GitHub repository with Actions enabled
+- A model name from [models.localai.io](https://models.localai.io) or a Hugging Face model reference
+
+#### Example Workflow
+
+This example workflow demonstrates how to use LocalAI to summarize pull request diffs and send notifications:
+
+1. **Create a workflow file**
+
+   Create a new file in your repository at `.github/workflows/localai.yml`:
+
+```yaml
+name: Use LocalAI in GHA
+on:
+  pull_request:
+     types:
+       - closed
+
+jobs:
+  notify-discord:
+    if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
+    env:
+        MODEL_NAME: qwen_qwen3-4b-instruct-2507
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0 # needed to checkout all branches for this Action to work
+    # Starts the LocalAI container
+    - id: foo
+      uses: mudler/localai-github-action@v1.1
+      with:
+        model: 'qwen_qwen3-4b-instruct-2507' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
+    # Check the PR diff using the current branch and the base branch of the PR
+    - uses: GrantBirki/git-diff-action@v2.7.0
+      id: git-diff-action
+      with:
+            json_diff_file_output: diff.json
+            raw_diff_file_output: diff.txt
+            file_output_only: "true"
+    # Ask to explain the diff to LocalAI
+    - name: Summarize
+      env:
+        DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
+      id: summarize
+      run: |
+            input="$(cat $DIFF)"
+
+            # Define the LocalAI API endpoint
+            API_URL="http://localhost:8080/chat/completions"
+
+            # Create a JSON payload using jq to handle special characters
+            json_payload=$(jq -n --arg input "$input" '{
+            model: "'$MODEL_NAME'",
+            messages: [
+                {
+                role: "system",
+                content: "Write a message summarizing the change diffs"
+                },
+                {
+                role: "user",
+                content: $input
+                }
+            ]
+            }')
+
+            # Send the request to LocalAI
+            response=$(curl -s -X POST $API_URL \
+            -H "Content-Type: application/json" \
+            -d "$json_payload")
+
+            # Extract the summary from the response
+            summary="$(echo $response | jq -r '.choices[0].message.content')"
+
+            # Print the summary
+            echo "Summary:"
+            echo "$summary"
+            echo "payload sent"
+            echo "$json_payload"
+            {
+                echo 'message<<EOF'
+                echo "$summary"
+                echo EOF
+              } >> "$GITHUB_OUTPUT"
+    # Send the summary somewhere (e.g. Discord)
+    - name: Discord notification
+      env:
+        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
+        DISCORD_USERNAME: "discord-bot"
+        DISCORD_AVATAR: ""
+      uses: Ilshidur/action-discord@master
+      with:
+        args: ${{ steps.summarize.outputs.message }}
+```
+
+#### Configuration Options
+
+- **Model selection**: Replace `qwen_qwen3-4b-instruct-2507` with any model from [models.localai.io](https://models.localai.io). You can also use Hugging Face models by using the full huggingface model url`.
+- **Trigger conditions**: Customize the `if` condition to control when the workflow runs. The example only runs when a PR is merged and has a specific label.
+- **API endpoint**: The LocalAI container runs on `http://localhost:8080` by default. The action exposes the service on the standard port.
+- **Custom prompts**: Modify the system message in the JSON payload to change what LocalAI is asked to do with the diff.
+
+#### Use Cases
+
+- **Code review automation**: Automatically review code changes and provide feedback
+- **Diff summarization**: Generate human-readable summaries of code changes
+- **Documentation generation**: Create documentation from code changes
+- **Security scanning**: Analyze code for potential security issues
+- **Test generation**: Generate test cases based on code changes
+
+#### Additional Resources
+
+- [LocalAI GitHub Action repository](https://github.com/mudler/localai-github-action)
+- [Available models](https://models.localai.io)
+- [LocalAI API documentation](/reference/)
+
+### Realtime Voice Assistant
+
+LocalAI supports realtime voice interactions , enabling voice assistant applications with real-time speech-to-speech communication. A complete example implementation is available in the [LocalAI-examples repository](https://github.com/mudler/LocalAI-examples/tree/main/realtime).
+
+#### Overview
+
+The realtime voice assistant example demonstrates how to build a voice assistant that:
+- Captures audio input from the user in real-time
+- Transcribes speech to text using LocalAI's transcription capabilities
+- Processes the text with a language model
+- Generates audio responses using text-to-speech
+- Streams audio back to the user in real-time
+
+#### Prerequisites
+
+- A transcription model (e.g., Whisper) configured in LocalAI
+- A text-to-speech model configured in LocalAI
+- A language model for generating responses
+
+#### Getting Started
+
+1. **Clone the example repository**
+
+   ```bash
+   git clone https://github.com/mudler/LocalAI-examples.git
+   cd LocalAI-examples/realtime
+   ```
+
+2. **Start LocalAI with Docker Compose**
+
+   ```bash
+   docker compose up -d
+   ```
+
+   The first time you start docker compose, it will take a while to download the available models. You can follow the model downloads in real-time:
+
+   ```bash
+   docker logs -f realtime-localai-1
+   ```
+
+3. **Install host dependencies**
+
+   Install the required host dependencies (sudo is required):
+
+   ```bash
+   sudo bash setup.sh
+   ```
+
+4. **Run the voice assistant**
+
+   Start the voice assistant application:
+
+   ```bash
+   bash run.sh
+   ```
+
+#### Configuration Notes
+
+- **CPU vs GPU**: The example is optimized for CPU usage. However, you can run LocalAI with a GPU for better performance and to use bigger/better models.
+- **Python client**: The Python part downloads PyTorch for CPU, but this is fine as computation is offloaded to LocalAI. The Python client only runs Silero VAD (Voice Activity Detection), which is fast, and handles audio recording.
+- **Thin client architecture**: The Python client is designed to run on thin clients such as Raspberry PIs, while LocalAI handles the heavier computational workload on a more powerful machine.
+
+#### Key Features
+
+- **Real-time processing**: Low-latency audio streaming for natural conversations
+- **Voice Activity Detection (VAD)**: Automatic detection of when the user is speaking
+- **Turn-taking**: Handles conversation flow with proper turn detection
+- **OpenAI-compatible API**: Uses LocalAI's OpenAI-compatible realtime API endpoints
+
+#### Use Cases
+
+- **Voice assistants**: Build custom voice assistants for home automation or productivity
+- **Accessibility tools**: Create voice interfaces for accessibility applications
+- **Interactive applications**: Add voice interaction to games, educational software, or entertainment apps
+- **Customer service**: Implement voice-based customer support systems
+
+#### Additional Resources
+
+- [Realtime Voice Assistant Example](https://github.com/mudler/LocalAI-examples/tree/main/realtime)
+- [LocalAI Realtime API documentation](/features/)
+- [Audio features documentation](/features/text-to-audio/)
+- [Transcription features documentation](/features/audio-to-text/)
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,29 @@
 ---
+- name: "rwkv7-g1c-13.3b"
+  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
+  urls:
+    - https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf
+  description: |
+    The model is **RWKV7 g1c 13B**, a large language model optimized for efficiency. It is quantized using **Bartowski's calibrationv5 for imatrix** to reduce memory usage while maintaining performance. The base model is **BlinkDL/rwkv7-g1**, and this version is tailored for text-generation tasks. It balances accuracy and efficiency, making it suitable for deployment in various applications.
+  overrides:
+    parameters:
+      model: llama-cpp/models/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
+    name: rwkv7-g1c-13.3b-gguf
+    backend: llama-cpp
+    template:
+      use_tokenizer_template: true
+    known_usecases:
+      - chat
+    function:
+      grammar:
+        disable: true
+    description: Imported from https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf
+    options:
+      - use_jinja:true
+  files:
+    - filename: llama-cpp/models/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
+      sha256: e06b3b31cee207723be00425cfc25ae09b7fa1abbd7d97eda4e62a7ef254f877
+      uri: https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf/resolve/main/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
 - name: "iquest-coder-v1-40b-instruct-i1"
  url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
  urls:
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -2,6 +2,7 @@ package functions

 import (
 	"encoding/json"
+	"fmt"

 	"github.com/mudler/xlog"
 )
@@ -102,3 +103,91 @@ func (f Functions) Select(name string) Functions {

 	return funcs
 }
+
+// sanitizeValue recursively sanitizes null values in a JSON structure, converting them to empty objects.
+// It handles maps, slices, and nested structures.
+func sanitizeValue(value interface{}, path string) interface{} {
+	if value == nil {
+		// Convert null to empty object
+		xlog.Debug("SanitizeTools: found null value, converting to empty object", "path", path)
+		return map[string]interface{}{}
+	}
+
+	switch v := value.(type) {
+	case map[string]interface{}:
+		// Recursively sanitize map values
+		sanitized := make(map[string]interface{})
+		for key, val := range v {
+			newPath := path
+			if newPath != "" {
+				newPath += "."
+			}
+			newPath += key
+			sanitized[key] = sanitizeValue(val, newPath)
+		}
+		return sanitized
+
+	case []interface{}:
+		// Recursively sanitize slice elements
+		sanitized := make([]interface{}, len(v))
+		for i, val := range v {
+			newPath := fmt.Sprintf("%s[%d]", path, i)
+			sanitized[i] = sanitizeValue(val, newPath)
+		}
+		return sanitized
+
+	default:
+		// For primitive types (string, number, bool), return as-is
+		return value
+	}
+}
+
+// SanitizeTools removes null values from tool.parameters.properties and converts them to empty objects.
+// This prevents Jinja template errors when processing tools with malformed parameter schemas.
+// It works by marshaling to JSON, recursively sanitizing the JSON structure, and unmarshaling back.
+func SanitizeTools(tools Tools) Tools {
+	if len(tools) == 0 {
+		return tools
+	}
+
+	xlog.Debug("SanitizeTools: processing tools", "count", len(tools))
+
+	// Marshal to JSON to work with the actual JSON representation
+	toolsJSON, err := json.Marshal(tools)
+	if err != nil {
+		xlog.Warn("SanitizeTools: failed to marshal tools to JSON", "error", err)
+		return tools
+	}
+
+	// Parse JSON into a generic structure
+	var toolsData []map[string]interface{}
+	if err := json.Unmarshal(toolsJSON, &toolsData); err != nil {
+		xlog.Warn("SanitizeTools: failed to unmarshal tools JSON", "error", err)
+		return tools
+	}
+
+	// Recursively sanitize the JSON structure
+	for i, tool := range toolsData {
+		if function, ok := tool["function"].(map[string]interface{}); ok {
+			// Recursively sanitize the entire tool structure
+			tool["function"] = sanitizeValue(function, fmt.Sprintf("tools[%d].function", i))
+		}
+		toolsData[i] = tool
+	}
+
+	// Marshal back to JSON
+	sanitizedJSON, err := json.Marshal(toolsData)
+	if err != nil {
+		xlog.Warn("SanitizeTools: failed to marshal sanitized tools", "error", err)
+		return tools
+	}
+
+	// Unmarshal back into Tools structure
+	var sanitized Tools
+	if err := json.Unmarshal(sanitizedJSON, &sanitized); err != nil {
+		xlog.Warn("SanitizeTools: failed to unmarshal sanitized tools", "error", err)
+		return tools
+	}
+
+	return sanitized
+}
--- a/pkg/functions/functions_test.go
+++ b/pkg/functions/functions_test.go
@@ -82,4 +82,202 @@ var _ = Describe("LocalAI grammar functions", func() {
 			Expect(functions[0].Name).To(Equal("create_event"))
 		})
 	})
+	Context("SanitizeTools()", func() {
+		It("returns empty slice when input is empty", func() {
+			tools := Tools{}
+			sanitized := SanitizeTools(tools)
+			Expect(len(sanitized)).To(Equal(0))
+		})
+
+		It("converts null values in parameters.properties to empty objects", func() {
+			tools := Tools{
+				{
+					Type: "function",
+					Function: Function{
+						Name:        "test_function",
+						Description: "A test function",
+						Parameters: map[string]interface{}{
+							"type": "object",
+							"properties": map[string]interface{}{
+								"valid_param": map[string]interface{}{
+									"type": "string",
+								},
+								"null_param": nil,
+								"another_valid": map[string]interface{}{
+									"type": "integer",
+								},
+							},
+						},
+					},
+				},
+			}
+
+			sanitized := SanitizeTools(tools)
+			Expect(len(sanitized)).To(Equal(1))
+			Expect(sanitized[0].Function.Name).To(Equal("test_function"))
+
+			properties := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
+			Expect(properties["valid_param"]).NotTo(BeNil())
+			Expect(properties["null_param"]).NotTo(BeNil())
+			Expect(properties["null_param"]).To(Equal(map[string]interface{}{}))
+			Expect(properties["another_valid"]).NotTo(BeNil())
+		})
+
+		It("preserves valid parameter structures unchanged", func() {
+			tools := Tools{
+				{
+					Type: "function",
+					Function: Function{
+						Name:        "valid_function",
+						Description: "A function with valid parameters",
+						Parameters: map[string]interface{}{
+							"type": "object",
+							"properties": map[string]interface{}{
+								"param1": map[string]interface{}{
+									"type":        "string",
+									"description": "First parameter",
+								},
+								"param2": map[string]interface{}{
+									"type": "integer",
+								},
+							},
+						},
+					},
+				},
+			}
+
+			sanitized := SanitizeTools(tools)
+			Expect(len(sanitized)).To(Equal(1))
+			Expect(sanitized[0].Function.Name).To(Equal("valid_function"))
+
+			properties := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
+			Expect(properties["param1"].(map[string]interface{})["type"]).To(Equal("string"))
+			Expect(properties["param1"].(map[string]interface{})["description"]).To(Equal("First parameter"))
+			Expect(properties["param2"].(map[string]interface{})["type"]).To(Equal("integer"))
+		})
+
+		It("handles tools without parameters field", func() {
+			tools := Tools{
+				{
+					Type: "function",
+					Function: Function{
+						Name:        "no_params_function",
+						Description: "A function without parameters",
+					},
+				},
+			}
+
+			sanitized := SanitizeTools(tools)
+			Expect(len(sanitized)).To(Equal(1))
+			Expect(sanitized[0].Function.Name).To(Equal("no_params_function"))
+			Expect(sanitized[0].Function.Parameters).To(BeNil())
+		})
+
+		It("handles tools without properties field", func() {
+			tools := Tools{
+				{
+					Type: "function",
+					Function: Function{
+						Name:        "no_properties_function",
+						Description: "A function without properties",
+						Parameters: map[string]interface{}{
+							"type": "object",
+						},
+					},
+				},
+			}
+
+			sanitized := SanitizeTools(tools)
+			Expect(len(sanitized)).To(Equal(1))
+			Expect(sanitized[0].Function.Name).To(Equal("no_properties_function"))
+			Expect(sanitized[0].Function.Parameters["type"]).To(Equal("object"))
+		})
+
+		It("handles multiple tools with mixed valid and null values", func() {
+			tools := Tools{
+				{
+					Type: "function",
+					Function: Function{
+						Name: "function_with_nulls",
+						Parameters: map[string]interface{}{
+							"properties": map[string]interface{}{
+								"valid": map[string]interface{}{
+									"type": "string",
+								},
+								"null1": nil,
+								"null2": nil,
+							},
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: Function{
+						Name: "function_all_valid",
+						Parameters: map[string]interface{}{
+							"properties": map[string]interface{}{
+								"param1": map[string]interface{}{
+									"type": "string",
+								},
+								"param2": map[string]interface{}{
+									"type": "integer",
+								},
+							},
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: Function{
+						Name: "function_no_params",
+					},
+				},
+			}
+
+			sanitized := SanitizeTools(tools)
+			Expect(len(sanitized)).To(Equal(3))
+
+			// First tool should have nulls converted to empty objects
+			props1 := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
+			Expect(props1["valid"]).NotTo(BeNil())
+			Expect(props1["null1"]).To(Equal(map[string]interface{}{}))
+			Expect(props1["null2"]).To(Equal(map[string]interface{}{}))
+
+			// Second tool should remain unchanged
+			props2 := sanitized[1].Function.Parameters["properties"].(map[string]interface{})
+			Expect(props2["param1"].(map[string]interface{})["type"]).To(Equal("string"))
+			Expect(props2["param2"].(map[string]interface{})["type"]).To(Equal("integer"))
+
+			// Third tool should remain unchanged
+			Expect(sanitized[2].Function.Parameters).To(BeNil())
+		})
+
+		It("does not modify the original tools slice", func() {
+			tools := Tools{
+				{
+					Type: "function",
+					Function: Function{
+						Name: "test_function",
+						Parameters: map[string]interface{}{
+							"properties": map[string]interface{}{
+								"null_param": nil,
+							},
+						},
+					},
+				},
+			}
+
+			originalProperties := tools[0].Function.Parameters["properties"].(map[string]interface{})
+			originalNullValue := originalProperties["null_param"]
+
+			sanitized := SanitizeTools(tools)
+
+			// Original should still have nil
+			Expect(originalNullValue).To(BeNil())
+
+			// Sanitized should have empty object
+			sanitizedProperties := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
+			Expect(sanitizedProperties["null_param"]).To(Equal(map[string]interface{}{}))
+		})
+	})
 })
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -702,30 +702,6 @@ const docTemplate = `{
                }
            }
        },
-        "/mcp/v1/completions": {
-            "post": {
-                "summary": "Generate completions for a given prompt and model.",
-                "parameters": [
-                    {
-                        "description": "query params",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/schema.OpenAIRequest"
-                        }
-                    }
-                ],
-                "responses": {
-                    "200": {
-                        "description": "Response",
-                        "schema": {
-                            "$ref": "#/definitions/schema.OpenAIResponse"
-                        }
-                    }
-                }
-            }
-        },
        "/metrics": {
            "get": {
                "summary": "Prometheus metrics endpoint",
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -695,30 +695,6 @@
                }
            }
        },
-        "/mcp/v1/completions": {
-            "post": {
-                "summary": "Generate completions for a given prompt and model.",
-                "parameters": [
-                    {
-                        "description": "query params",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/schema.OpenAIRequest"
-                        }
-                    }
-                ],
-                "responses": {
-                    "200": {
-                        "description": "Response",
-                        "schema": {
-                            "$ref": "#/definitions/schema.OpenAIResponse"
-                        }
-                    }
-                }
-            }
-        },
        "/metrics": {
            "get": {
                "summary": "Prometheus metrics endpoint",
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -1495,21 +1495,6 @@ paths:
          schema:
            $ref: '#/definitions/services.GalleryOpStatus'
      summary: Returns the job status
-  /mcp/v1/completions:
-    post:
-      parameters:
-      - description: query params
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/schema.OpenAIRequest'
-      responses:
-        "200":
-          description: Response
-          schema:
-            $ref: '#/definitions/schema.OpenAIResponse'
-      summary: Generate completions for a given prompt and model.
  /metrics:
    get:
      parameters:
Author	SHA1	Message	Date
Ettore Di Giacinto	93d3e4257a	recursive Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-04 20:22:02 +00:00
Ettore Di Giacinto	e0e904ff98	fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-04 20:22:02 +00:00
Ettore Di Giacinto	a95422f4d1	fix(tools): sanitize inputs Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-04 20:22:02 +00:00
Ettore Di Giacinto	560bf50299	chore(Makefile): refactor common make targets (#7858 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-04 21:12:50 +01:00
LocalAI [bot]	a7e155240b	chore: ⬆️ Update ggml-org/llama.cpp to `e57f52334b2e8436a94f7e332462dfc63a08f995` (#7848 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2026-01-04 10:27:45 +01:00
LocalAI [bot]	793e4907a2	feat(swagger): update swagger (#7847 ) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2026-01-03 22:09:39 +01:00
Ettore Di Giacinto	d38811560c	chore(docs): add opencode, GHA, and realtime voice assistant examples Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-03 22:03:43 +01:00
Ettore Di Giacinto	33cc0b8e13	fix(chat/ui): record model name in history for consistency (#7845 ) Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-03 18:05:33 +01:00
lif	4cd95b8a9d	fix: Highly inconsistent agent response to cogito agent calling MCP server - Body "Invalid http method" (#7790 ) * fix: resolve duplicate MCP route registration causing 50% failure rate Fixes #7772 The issue was caused by duplicate registration of the MCP endpoint /mcp/v1/chat/completions in both openai.go and localai.go, leading to a race condition where requests would randomly hit different handlers with incompatible behaviors. Changes: - Removed duplicate MCP route registration from openai.go - Kept the localai.MCPStreamEndpoint as the canonical handler - Added all three MCP route patterns for backward compatibility: * /v1/mcp/chat/completions * /mcp/v1/chat/completions * /mcp/chat/completions - Added comments to clarify route ownership and prevent future conflicts - Fixed formatting in ui_api.go The localai.MCPStreamEndpoint handler is more feature-complete as it supports both streaming and non-streaming modes, while the removed openai.MCPCompletionEndpoint only supported synchronous requests. This eliminates the ~50% failure rate where the cogito library would receive "Invalid http method" errors when internal HTTP requests were routed to the wrong handler. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> Signed-off-by: majiayu000 <1835304752@qq.com> * Address feedback from review Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: majiayu000 <1835304752@qq.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-03 15:43:23 +01:00
LocalAI [bot]	8c504113a2	chore(model gallery): 🤖 add 1 new models via gallery agent (#7840 ) chore(model gallery): 🤖 add new models via gallery agent Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2026-01-03 08:42:05 +01:00
coffeerunhobby	666d110714	fix: Prevent BMI2 instruction crash on AVX-only CPUs (#7817 ) * Fix: Prevent BMI2 instruction crash on AVX-only CPUs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: apply no-bmi flags on non-darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: coffeerunhobby <coffeerunhobby@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>	2026-01-03 08:36:55 +01:00
LocalAI [bot]	641606ae93	chore: ⬆️ Update ggml-org/llama.cpp to `706e3f93a60109a40f1224eaf4af0d59caa7c3ae` (#7836 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2026-01-02 21:26:37 +00:00