Compare commits

...

12 Commits

Author SHA1 Message Date
Ettore Di Giacinto
93d3e4257a recursive
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-04 20:22:02 +00:00
Ettore Di Giacinto
e0e904ff98 fixups
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-04 20:22:02 +00:00
Ettore Di Giacinto
a95422f4d1 fix(tools): sanitize inputs
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-04 20:22:02 +00:00
Ettore Di Giacinto
560bf50299 chore(Makefile): refactor common make targets (#7858)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-04 21:12:50 +01:00
LocalAI [bot]
a7e155240b chore: ⬆️ Update ggml-org/llama.cpp to e57f52334b2e8436a94f7e332462dfc63a08f995 (#7848)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-04 10:27:45 +01:00
LocalAI [bot]
793e4907a2 feat(swagger): update swagger (#7847)
Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-03 22:09:39 +01:00
Ettore Di Giacinto
d38811560c chore(docs): add opencode, GHA, and realtime voice assistant examples
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-03 22:03:43 +01:00
Ettore Di Giacinto
33cc0b8e13 fix(chat/ui): record model name in history for consistency (#7845)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-03 18:05:33 +01:00
lif
4cd95b8a9d fix: Highly inconsistent agent response to cogito agent calling MCP server - Body "Invalid http method" (#7790)
* fix: resolve duplicate MCP route registration causing 50% failure rate

Fixes #7772

The issue was caused by duplicate registration of the MCP endpoint
/mcp/v1/chat/completions in both openai.go and localai.go, leading
to a race condition where requests would randomly hit different
handlers with incompatible behaviors.

Changes:
- Removed duplicate MCP route registration from openai.go
- Kept the localai.MCPStreamEndpoint as the canonical handler
- Added all three MCP route patterns for backward compatibility:
  * /v1/mcp/chat/completions
  * /mcp/v1/chat/completions
  * /mcp/chat/completions
- Added comments to clarify route ownership and prevent future conflicts
- Fixed formatting in ui_api.go

The localai.MCPStreamEndpoint handler is more feature-complete as it
supports both streaming and non-streaming modes, while the removed
openai.MCPCompletionEndpoint only supported synchronous requests.

This eliminates the ~50% failure rate where the cogito library would
receive "Invalid http method" errors when internal HTTP requests were
routed to the wrong handler.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Signed-off-by: majiayu000 <1835304752@qq.com>

* Address feedback from review

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: majiayu000 <1835304752@qq.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-03 15:43:23 +01:00
LocalAI [bot]
8c504113a2 chore(model gallery): 🤖 add 1 new models via gallery agent (#7840)
chore(model gallery): 🤖 add new models via gallery agent

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-03 08:42:05 +01:00
coffeerunhobby
666d110714 fix: Prevent BMI2 instruction crash on AVX-only CPUs (#7817)
* Fix: Prevent BMI2 instruction crash on AVX-only CPUs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: apply no-bmi flags on non-darwin

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: coffeerunhobby <coffeerunhobby@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
2026-01-03 08:36:55 +01:00
LocalAI [bot]
641606ae93 chore: ⬆️ Update ggml-org/llama.cpp to 706e3f93a60109a40f1224eaf4af0d59caa7c3ae (#7836)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2026-01-02 21:26:37 +00:00
19 changed files with 817 additions and 412 deletions

262
Makefile
View File

@@ -6,6 +6,7 @@ LAUNCHER_BINARY_NAME=local-ai-launcher
CUDA_MAJOR_VERSION?=13
CUDA_MINOR_VERSION?=0
UBUNTU_VERSION?=2204
GORELEASER?=
@@ -155,7 +156,16 @@ test: test-models/testmodel.ggml protogen-go
########################################################
docker-build-aio:
docker build --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
docker build \
--build-arg MAKEFLAGS="--jobs=5 --output-sync=target" \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
--build-arg GO_TAGS="$(GO_TAGS)" \
-t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
e2e-aio:
@@ -177,7 +187,16 @@ prepare-e2e:
mkdir -p $(TEST_DIR)
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
docker build \
--build-arg IMAGE_TYPE=core \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-t localai-tests .
run-e2e-image:
ls -liah $(abspath ./tests/e2e-fixtures)
@@ -308,6 +327,9 @@ docker:
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-t $(DOCKER_IMAGE) .
docker-cuda11:
@@ -319,6 +341,7 @@ docker-cuda11:
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-t $(DOCKER_IMAGE)-cuda-11 .
docker-aio:
@@ -326,6 +349,9 @@ docker-aio:
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
docker-aio-all:
@@ -338,62 +364,26 @@ docker-image-intel:
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="$(GO_TAGS)" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg BUILD_TYPE=intel -t $(DOCKER_IMAGE) .
--build-arg BUILD_TYPE=intel \
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-t $(DOCKER_IMAGE) .
########################################################
## Backends
########################################################
# Pattern rule for standard backends (docker-based)
# This matches all backends that use docker-build-* and docker-save-*
backends/%: docker-build-% docker-save-% build
./local-ai backends install "ocifile://$(abspath ./backend-images/$*.tar)"
backends/diffusers: docker-build-diffusers docker-save-diffusers build
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
backends/piper: docker-build-piper docker-save-piper build
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
backends/whisper: docker-build-whisper docker-save-whisper build
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
backends/local-store: docker-build-local-store docker-save-local-store build
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
backends/huggingface: docker-build-huggingface docker-save-huggingface build
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"
backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"
backends/kokoro: docker-build-kokoro docker-save-kokoro build
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
backends/chatterbox: docker-build-chatterbox docker-save-chatterbox build
./local-ai backends install "ocifile://$(abspath ./backend-images/chatterbox.tar)"
# Darwin-specific backends (keep as explicit targets since they have special build logic)
backends/llama-cpp-darwin: build
bash ./scripts/build/llama-cpp-darwin.sh
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
backends/neutts: docker-build-neutts docker-save-neutts build
./local-ai backends install "ocifile://$(abspath ./backend-images/neutts.tar)"
backends/vllm: docker-build-vllm docker-save-vllm build
./local-ai backends install "ocifile://$(abspath ./backend-images/vllm.tar)"
backends/vibevoice: docker-build-vibevoice docker-save-vibevoice build
./local-ai backends install "ocifile://$(abspath ./backend-images/vibevoice.tar)"
build-darwin-python-backend: build
bash ./scripts/build/python-darwin.sh
@@ -423,119 +413,83 @@ backends/stablediffusion-ggml-darwin:
backend-images:
mkdir -p backend-images
docker-build-llama-cpp:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
# Backend metadata: BACKEND_NAME | DOCKERFILE_TYPE | BUILD_CONTEXT | PROGRESS_FLAG | NEEDS_BACKEND_ARG
# llama-cpp is special - uses llama-cpp Dockerfile and doesn't need BACKEND arg
BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false
docker-build-bark-cpp:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .
# Golang backends
BACKEND_BARK_CPP = bark-cpp|golang|.|false|true
BACKEND_PIPER = piper|golang|.|false|true
BACKEND_LOCAL_STORE = local-store|golang|.|false|true
BACKEND_HUGGINGFACE = huggingface|golang|.|false|true
BACKEND_SILERO_VAD = silero-vad|golang|.|false|true
BACKEND_STABLEDIFFUSION_GGML = stablediffusion-ggml|golang|.|--progress=plain|true
BACKEND_WHISPER = whisper|golang|.|false|true
docker-build-piper:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper .
# Python backends with root context
BACKEND_RERANKERS = rerankers|python|.|false|true
BACKEND_TRANSFORMERS = transformers|python|.|false|true
BACKEND_FASTER_WHISPER = faster-whisper|python|.|false|true
BACKEND_COQUI = coqui|python|.|false|true
BACKEND_BARK = bark|python|.|false|true
BACKEND_EXLLAMA2 = exllama2|python|.|false|true
docker-build-local-store:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store .
# Python backends with ./backend context
BACKEND_RFDETR = rfdetr|python|./backend|false|true
BACKEND_KITTEN_TTS = kitten-tts|python|./backend|false|true
BACKEND_NEUTTS = neutts|python|./backend|false|true
BACKEND_KOKORO = kokoro|python|./backend|false|true
BACKEND_VLLM = vllm|python|./backend|false|true
BACKEND_DIFFUSERS = diffusers|python|./backend|--progress=plain|true
BACKEND_CHATTERBOX = chatterbox|python|./backend|false|true
BACKEND_VIBEVOICE = vibevoice|python|./backend|--progress=plain|true
docker-build-huggingface:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface .
# Helper function to build docker image for a backend
# Usage: $(call docker-build-backend,BACKEND_NAME,DOCKERFILE_TYPE,BUILD_CONTEXT,PROGRESS_FLAG,NEEDS_BACKEND_ARG)
define docker-build-backend
docker build $(if $(filter-out false,$(4)),$(4)) \
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
$(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \
-t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3)
endef
docker-build-rfdetr:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr ./backend
# Generate docker-build targets from backend definitions
define generate-docker-build-target
docker-build-$(word 1,$(subst |, ,$(1))):
$$(call docker-build-backend,$(word 1,$(subst |, ,$(1))),$(word 2,$(subst |, ,$(1))),$(word 3,$(subst |, ,$(1))),$(word 4,$(subst |, ,$(1))),$(word 5,$(subst |, ,$(1))))
endef
docker-build-kitten-tts:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts ./backend
# Generate all docker-build targets
$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
$(eval $(call generate-docker-build-target,$(BACKEND_BARK_CPP)))
$(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
$(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
$(eval $(call generate-docker-build-target,$(BACKEND_SILERO_VAD)))
$(eval $(call generate-docker-build-target,$(BACKEND_STABLEDIFFUSION_GGML)))
$(eval $(call generate-docker-build-target,$(BACKEND_WHISPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_RERANKERS)))
$(eval $(call generate-docker-build-target,$(BACKEND_TRANSFORMERS)))
$(eval $(call generate-docker-build-target,$(BACKEND_FASTER_WHISPER)))
$(eval $(call generate-docker-build-target,$(BACKEND_COQUI)))
$(eval $(call generate-docker-build-target,$(BACKEND_BARK)))
$(eval $(call generate-docker-build-target,$(BACKEND_EXLLAMA2)))
$(eval $(call generate-docker-build-target,$(BACKEND_RFDETR)))
$(eval $(call generate-docker-build-target,$(BACKEND_KITTEN_TTS)))
$(eval $(call generate-docker-build-target,$(BACKEND_NEUTTS)))
$(eval $(call generate-docker-build-target,$(BACKEND_KOKORO)))
$(eval $(call generate-docker-build-target,$(BACKEND_VLLM)))
$(eval $(call generate-docker-build-target,$(BACKEND_DIFFUSERS)))
$(eval $(call generate-docker-build-target,$(BACKEND_CHATTERBOX)))
$(eval $(call generate-docker-build-target,$(BACKEND_VIBEVOICE)))
docker-save-kitten-tts: backend-images
docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
docker-save-chatterbox: backend-images
docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
docker-save-vibevoice: backend-images
docker save local-ai-backend:vibevoice -o backend-images/vibevoice.tar
docker-build-neutts:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts ./backend
docker-save-neutts: backend-images
docker save local-ai-backend:neutts -o backend-images/neutts.tar
docker-build-kokoro:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
docker-build-vllm:
docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend
docker-save-vllm: backend-images
docker save local-ai-backend:vllm -o backend-images/vllm.tar
docker-save-kokoro: backend-images
docker save local-ai-backend:kokoro -o backend-images/kokoro.tar
docker-save-rfdetr: backend-images
docker save local-ai-backend:rfdetr -o backend-images/rfdetr.tar
docker-save-huggingface: backend-images
docker save local-ai-backend:huggingface -o backend-images/huggingface.tar
docker-save-local-store: backend-images
docker save local-ai-backend:local-store -o backend-images/local-store.tar
docker-build-silero-vad:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad .
docker-save-silero-vad: backend-images
docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
docker-save-piper: backend-images
docker save local-ai-backend:piper -o backend-images/piper.tar
docker-save-llama-cpp: backend-images
docker save local-ai-backend:llama-cpp -o backend-images/llama-cpp.tar
docker-save-bark-cpp: backend-images
docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar
docker-build-stablediffusion-ggml:
docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml .
docker-save-stablediffusion-ggml: backend-images
docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar
docker-build-rerankers:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
docker-build-transformers:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
docker-build-diffusers:
docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers ./backend
docker-save-diffusers: backend-images
docker save local-ai-backend:diffusers -o backend-images/diffusers.tar
docker-build-whisper:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper .
docker-save-whisper: backend-images
docker save local-ai-backend:whisper -o backend-images/whisper.tar
docker-build-faster-whisper:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
docker-build-coqui:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
docker-build-bark:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
docker-build-chatterbox:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
docker-build-vibevoice:
docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vibevoice -f backend/Dockerfile.python --build-arg BACKEND=vibevoice ./backend
docker-build-exllama2:
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
# Pattern rule for docker-save targets
docker-save-%: backend-images
docker save local-ai-backend:$* -o backend-images/$*.tar
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-exllama2

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=ced765be44ce173c374f295b3c6f4175f8fd109b
LLAMA_VERSION?=e57f52334b2e8436a94f7e332462dfc63a08f995
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=
@@ -8,6 +8,7 @@ NATIVE?=false
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
TARGET?=--target grpc-server
JOBS?=$(shell nproc)
ARCH?=$(shell uname -m)
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=OFF
@@ -106,21 +107,39 @@ llama-cpp-avx: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
$(info ${GREEN}I llama-cpp build info:avx${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS=-mno-bmi2 -DCMAKE_CXX_FLAGS=-mno-bmi2" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
llama-cpp-fallback: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
llama-cpp-grpc: llama.cpp
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
ifeq ($(OS),Darwin)
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
else
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
endif
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
llama-cpp-rpc-server: llama-cpp-grpc

View File

@@ -293,6 +293,8 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
return data;
}
// Sanitize tools JSON to remove null values from tool.parameters.properties
// This prevents Jinja template errors when processing tools with malformed parameter schemas
const std::vector<ggml_type> kv_cache_types = {
GGML_TYPE_F32,

View File

@@ -53,12 +53,12 @@ type MCPErrorEvent struct {
Message string `json:"message"`
}
// MCPStreamEndpoint is the SSE streaming endpoint for MCP chat completions
// MCPEndpoint is the endpoint for MCP chat completions. Supports SSE mode, but it is not compatible with the OpenAI apis.
// @Summary Stream MCP chat completions with reasoning, tool calls, and results
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/mcp/chat/completions [post]
func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
ctx := c.Request().Context()
created := int(time.Now().Unix())

View File

@@ -622,7 +622,9 @@ func handleQuestion(config *config.ModelConfig, cl *config.ModelConfigLoader, in
// Serialize tools and tool_choice to JSON strings
toolsJSON := ""
if len(input.Tools) > 0 {
toolsBytes, err := json.Marshal(input.Tools)
// Sanitize tools to remove null values from parameters.properties
sanitizedTools := functions.SanitizeTools(input.Tools)
toolsBytes, err := json.Marshal(sanitizedTools)
if err == nil {
toolsJSON = string(toolsBytes)
}

View File

@@ -7,6 +7,7 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/functions"
model "github.com/mudler/LocalAI/pkg/model"
)
@@ -42,7 +43,9 @@ func ComputeChoices(
// Serialize tools and tool_choice to JSON strings
toolsJSON := ""
if len(req.Tools) > 0 {
toolsBytes, err := json.Marshal(req.Tools)
// Sanitize tools to remove null values from parameters.properties
sanitizedTools := functions.SanitizeTools(req.Tools)
toolsBytes, err := json.Marshal(sanitizedTools)
if err == nil {
toolsJSON = string(toolsBytes)
}

View File

@@ -1,148 +0,0 @@
package openai
import (
"context"
"encoding/json"
"errors"
"fmt"
"net"
"time"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/config"
mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/templates"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/cogito"
"github.com/mudler/xlog"
)
// MCPCompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
// @Summary Generate completions for a given prompt and model.
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /mcp/v1/completions [post]
func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
// We do not support streaming mode (Yet?)
return func(c echo.Context) error {
created := int(time.Now().Unix())
ctx := c.Request().Context()
// Handle Correlation
id := c.Request().Header.Get("X-Correlation-ID")
if id == "" {
id = uuid.New().String()
}
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
if !ok || input.Model == "" {
return echo.ErrBadRequest
}
config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || config == nil {
return echo.ErrBadRequest
}
if config.MCP.Servers == "" && config.MCP.Stdio == "" {
return fmt.Errorf("no MCP servers configured")
}
// Get MCP config from model config
remote, stdio, err := config.MCP.MCPConfigFromYAML()
if err != nil {
return fmt.Errorf("failed to get MCP config: %w", err)
}
// Check if we have tools in cache, or we have to have an initial connection
sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio)
if err != nil {
return fmt.Errorf("failed to get MCP sessions: %w", err)
}
if len(sessions) == 0 {
return fmt.Errorf("no working MCP servers found")
}
fragment := cogito.NewEmptyFragment()
for _, message := range input.Messages {
fragment = fragment.AddMessage(message.Role, message.StringContent)
}
_, port, err := net.SplitHostPort(appConfig.APIAddress)
if err != nil {
return err
}
apiKey := ""
if appConfig.ApiKeys != nil {
apiKey = appConfig.ApiKeys[0]
}
ctxWithCancellation, cancel := context.WithCancel(ctx)
defer cancel()
// TODO: instead of connecting to the API, we should just wire this internally
// and act like completion.go.
// We can do this as cogito expects an interface and we can create one that
// we satisfy to just call internally ComputeChoices
defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
// Build cogito options using the consolidated method
cogitoOpts := config.BuildCogitoOptions()
cogitoOpts = append(
cogitoOpts,
cogito.WithContext(ctxWithCancellation),
cogito.WithMCPs(sessions...),
cogito.WithStatusCallback(func(s string) {
xlog.Debug("[model agent] Status", "model", config.Name, "status", s)
}),
cogito.WithReasoningCallback(func(s string) {
xlog.Debug("[model agent] Reasoning", "model", config.Name, "reasoning", s)
}),
cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision {
xlog.Debug("[model agent] Tool call", "model", config.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments)
return cogito.ToolCallDecision{
Approved: true,
}
}),
cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
xlog.Debug("[model agent] Tool call result", "model", config.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments)
}),
)
f, err := cogito.ExecuteTools(
defaultLLM, fragment,
cogitoOpts...,
)
if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
return err
}
f, err = defaultLLM.Ask(ctx, f)
if err != nil {
return err
}
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}},
Object: "text_completion",
}
jsonResult, _ := json.Marshal(resp)
xlog.Debug("Response", "response", string(jsonResult))
// Return the prediction in the response body
return c.JSON(200, resp)
}
}

View File

@@ -137,9 +137,10 @@ func RegisterLocalAIRoutes(router *echo.Echo,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }))
// MCP Stream endpoint
// MCP endpoint - supports both streaming and non-streaming modes
// Note: streaming mode is NOT compatible with the OpenAI apis. We have a set which streams more states.
if evaluator != nil {
mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig)
mcpStreamHandler := localai.MCPEndpoint(cl, ml, evaluator, appConfig)
mcpStreamMiddleware := []echo.MiddlewareFunc{
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
@@ -154,6 +155,7 @@ func RegisterLocalAIRoutes(router *echo.Echo,
}
router.POST("/v1/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
router.POST("/mcp/v1/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
router.POST("/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
}
// Agent job routes

View File

@@ -79,24 +79,6 @@ func RegisterOpenAIRoutes(app *echo.Echo,
app.POST("/completions", completionHandler, completionMiddleware...)
app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...)
// MCPcompletion
mcpCompletionHandler := openai.MCPCompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
mcpCompletionMiddleware := []echo.MiddlewareFunc{
traceMiddleware,
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
if err := re.SetOpenAIRequest(c); err != nil {
return err
}
return next(c)
}
},
}
app.POST("/mcp/v1/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
app.POST("/mcp/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
// embeddings
embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
embeddingMiddleware := []echo.MiddlewareFunc{

View File

@@ -954,7 +954,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
if !appConfig.EnableTracing {
return c.JSON(503, map[string]any{
"error": "Tracing disabled",
})
})
}
traces := middleware.GetTraces()
return c.JSON(200, map[string]interface{}{

View File

@@ -750,6 +750,7 @@ function stopRequest() {
if (!activeChat) return;
const request = activeRequests.get(activeChat.id);
const requestModel = request?.model || null; // Get model before deleting request
if (request) {
if (request.controller) {
request.controller.abort();
@@ -779,7 +780,8 @@ function stopRequest() {
`<span class='error'>Request cancelled by user</span>`,
null,
null,
activeChat.id
activeChat.id,
requestModel
);
}
@@ -1231,7 +1233,8 @@ async function promptGPT(systemPrompt, input) {
startTime: requestStartTime,
tokensReceived: 0,
interval: null,
maxTokensPerSecond: 0
maxTokensPerSecond: 0,
model: model // Store the model used for this request
});
// Update reactive tracking for UI indicators
@@ -1271,21 +1274,27 @@ async function promptGPT(systemPrompt, input) {
return;
} else {
// Timeout error (controller was aborted by timeout, not user)
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add(
"assistant",
`<span class='error'>Request timeout: MCP processing is taking longer than expected. Please try again.</span>`,
null,
null,
chatId
chatId,
requestModel
);
}
} else {
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add(
"assistant",
`<span class='error'>Network Error: ${error.message}</span>`,
null,
null,
chatId
chatId,
requestModel
);
}
toggleLoader(false, chatId);
@@ -1299,12 +1308,15 @@ async function promptGPT(systemPrompt, input) {
}
if (!response.ok) {
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add(
"assistant",
`<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
null,
null,
chatId
chatId,
requestModel
);
toggleLoader(false, chatId);
activeRequests.delete(chatId);
@@ -1324,12 +1336,15 @@ async function promptGPT(systemPrompt, input) {
.getReader();
if (!reader) {
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add(
"assistant",
`<span class='error'>Error: Failed to decode MCP API response</span>`,
null,
null,
chatId
chatId,
requestModel
);
toggleLoader(false, chatId);
activeRequests.delete(chatId);
@@ -1598,12 +1613,15 @@ async function promptGPT(systemPrompt, input) {
break;
case "error":
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add(
"assistant",
`<span class='error'>MCP Error: ${eventData.message}</span>`,
null,
null,
chatId
chatId,
requestModel
);
break;
}
@@ -1624,9 +1642,11 @@ async function promptGPT(systemPrompt, input) {
// Update or create assistant message with processed regular content
const currentChat = chatStore.getChat(chatId);
if (!currentChat) break; // Chat was deleted
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
if (lastAssistantMessageIndex === -1) {
if (processedRegular && processedRegular.trim()) {
chatStore.add("assistant", processedRegular, null, null, chatId);
chatStore.add("assistant", processedRegular, null, null, chatId, requestModel);
lastAssistantMessageIndex = targetHistory.length - 1;
}
} else {
@@ -1706,7 +1726,9 @@ async function promptGPT(systemPrompt, input) {
lastMessage.html = DOMPurify.sanitize(marked.parse(lastMessage.content));
}
} else if (processedRegular && processedRegular.trim()) {
chatStore.add("assistant", processedRegular, null, null, chatId);
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add("assistant", processedRegular, null, null, chatId, requestModel);
lastAssistantMessageIndex = targetHistory.length - 1;
}
}
@@ -1754,7 +1776,9 @@ async function promptGPT(systemPrompt, input) {
lastMessage.html = DOMPurify.sanitize(marked.parse(lastMessage.content));
}
} else {
chatStore.add("assistant", finalRegular, null, null, chatId);
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add("assistant", finalRegular, null, null, chatId, requestModel);
}
}
@@ -1812,12 +1836,15 @@ async function promptGPT(systemPrompt, input) {
.getReader();
if (!reader) {
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add(
"assistant",
`<span class='error'>Error: Failed to decode API response</span>`,
null,
null,
chatId
chatId,
requestModel
);
toggleLoader(false, chatId);
activeRequests.delete(chatId);
@@ -1848,9 +1875,11 @@ async function promptGPT(systemPrompt, input) {
const addToChat = (token) => {
const currentChat = chatStore.getChat(chatId);
if (!currentChat) return; // Chat was deleted
chatStore.add("assistant", token, null, null, chatId);
// Count tokens for rate calculation (per chat)
// Get model from request for this chat
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add("assistant", token, null, null, chatId, requestModel);
// Count tokens for rate calculation (per chat)
if (request) {
const tokenCount = Math.ceil(token.length / 4);
request.tokensReceived += tokenCount;
@@ -2008,12 +2037,15 @@ async function promptGPT(systemPrompt, input) {
if (error.name !== 'AbortError' || !currentAbortController) {
const currentChat = chatStore.getChat(chatId);
if (currentChat) {
const request = activeRequests.get(chatId);
const requestModel = request?.model || null;
chatStore.add(
"assistant",
`<span class='error'>Error: Failed to process stream</span>`,
null,
null,
chatId
chatId,
requestModel
);
}
}

View File

@@ -276,12 +276,31 @@ SOFTWARE.
}
},
add(role, content, image, audio, targetChatId = null) {
add(role, content, image, audio, targetChatId = null, model = null) {
// If targetChatId is provided, add to that chat, otherwise use active chat
// This allows streaming to continue to the correct chat even if user switches
const chat = targetChatId ? this.getChat(targetChatId) : this.activeChat();
if (!chat) return;
// Determine model for this message:
// - If model is explicitly provided, use it (for assistant messages with specific model)
// - For user messages, use the current chat's model
// - For other messages (thinking, tool_call, etc.), inherit from previous message or use chat model
let messageModel = model;
if (!messageModel) {
if (role === "user") {
// User messages always use the current chat's model
messageModel = chat.model || "";
} else if (role === "assistant") {
// Assistant messages use the chat's model (should be set when request is made)
messageModel = chat.model || "";
} else {
// For thinking, tool_call, etc., try to inherit from last assistant message, or use chat model
const lastAssistant = chat.history.slice().reverse().find(m => m.role === "assistant");
messageModel = lastAssistant?.model || chat.model || "";
}
}
const N = chat.history.length - 1;
// For thinking, reasoning, tool_call, and tool_result messages, always create a new message
if (role === "thinking" || role === "reasoning" || role === "tool_call" || role === "tool_result") {
@@ -311,7 +330,7 @@ SOFTWARE.
// Reasoning, tool_call, and tool_result are always collapsed by default
const isMCPMode = chat.mcpMode || false;
const shouldExpand = (role === "thinking" && !isMCPMode) || false;
chat.history.push({ role, content, html: c, image, audio, expanded: shouldExpand });
chat.history.push({ role, content, html: c, image, audio, expanded: shouldExpand, model: messageModel });
// Auto-name chat from first user message
if (role === "user" && chat.name === "New Chat" && content.trim()) {
@@ -332,6 +351,10 @@ SOFTWARE.
if (audio && audio.length > 0) {
chat.history[N].audio = [...(chat.history[N].audio || []), ...audio];
}
// Preserve model if merging (don't overwrite)
if (!chat.history[N].model && messageModel) {
chat.history[N].model = messageModel;
}
} else {
let c = "";
const lines = content.split("\n");
@@ -343,7 +366,8 @@ SOFTWARE.
content,
html: c,
image: image || [],
audio: audio || []
audio: audio || [],
model: messageModel
});
// Auto-name chat from first user message
@@ -1248,11 +1272,20 @@ SOFTWARE.
</template>
<template x-if="message.role != 'user' && message.role != 'thinking' && message.role != 'reasoning' && message.role != 'tool_call' && message.role != 'tool_result'">
<div class="flex items-center space-x-2">
{{ if $galleryConfig }}
{{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">{{end}}
{{ end }}
<!-- Model icon - from message history, fallback to active chat -->
<template x-if="message.model && window.__galleryConfigs && window.__galleryConfigs[message.model] && window.__galleryConfigs[message.model].Icon">
<img :src="window.__galleryConfigs[message.model].Icon" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">
</template>
<!-- Fallback: use active chat model if message doesn't have one -->
<template x-if="!message.model && $store.chat.activeChat() && $store.chat.activeChat().model && window.__galleryConfigs && window.__galleryConfigs[$store.chat.activeChat().model] && window.__galleryConfigs[$store.chat.activeChat().model].Icon">
<img :src="window.__galleryConfigs[$store.chat.activeChat().model].Icon" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">
</template>
<!-- Final fallback: initial model from server -->
<template x-if="!message.model && (!$store.chat.activeChat() || !$store.chat.activeChat().model) && window.__galleryConfigs && window.__galleryConfigs['{{$model}}'] && window.__galleryConfigs['{{$model}}'].Icon">
<img :src="window.__galleryConfigs['{{$model}}'].Icon" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[var(--color-primary-border)]/20">
</template>
<div class="flex flex-col flex-1">
<span class="text-xs font-semibold text-[var(--color-text-secondary)] mb-1">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
<span class="text-xs font-semibold text-[var(--color-text-secondary)] mb-1" x-text="message.model || $store.chat.activeChat()?.model || '{{if .Model}}{{.Model}}{{else}}Assistant{{end}}'"></span>
<div class="flex-1 text-[var(--color-text-primary)] flex items-center space-x-2 min-w-0">
<div class="p-3 rounded-lg bg-[var(--color-bg-secondary)] border border-[var(--color-accent-border)]/20 shadow-lg max-w-full overflow-x-auto overflow-wrap-anywhere" x-html="message.html"></div>
<button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-[var(--color-text-secondary)] hover:text-[var(--color-primary)] transition-colors p-1 flex-shrink-0">

View File

@@ -34,3 +34,278 @@ The list below is a list of software that integrates with LocalAI.
- [Langchain](https://docs.langchain.com/oss/python/integrations/providers/localai) integration package [pypi](https://pypi.org/project/langchain-localai/)
Feel free to open up a Pull request (by clicking at the "Edit page" below) to get a page for your project made or if you see a error on one of the pages!
## Configuration Guides
This section provides step-by-step instructions for configuring specific software to work with LocalAI.
### OpenCode
[OpenCode](https://opencode.ai) is an AI-powered code editor that can be configured to use LocalAI as its backend provider.
#### Prerequisites
- LocalAI must be running and accessible (either locally or on a network)
- You need to know your LocalAI server's IP address/hostname and port (default is `8080`)
#### Configuration Steps
1. **Edit the OpenCode configuration file**
Open the OpenCode configuration file located at `~/.config/opencode/opencode.json` in your editor.
2. **Add LocalAI provider configuration**
Add the following configuration to your `opencode.json` file, replacing the values with your own:
```json
{
"$schema": "https://opencode.ai/config.json",
"provider": {
"LocalAI": {
"npm": "@ai-sdk/openai-compatible",
"name": "LocalAI (local)",
"options": {
"baseURL": "http://127.0.0.1:8080/v1"
},
"models": {
"Qwen3-Coder-30B-A3B-Instruct-i1-GGUF": {
"name": "Qwen3-Coder-30B-A3B-Instruct-i1-GGUF",
"limit": {
"context": 38000,
"output": 65536
}
},
"qwen_qwen3-30b-a3b-instruct-2507": {
"name": "qwen_qwen3-30b-a3b-instruct-2507",
"limit": {
"context": 38000,
"output": 65536
}
}
}
}
}
}
```
3. **Customize the configuration**
- **baseURL**: Replace `http://127.0.0.1:8080/v1` with your LocalAI server's address and port.
- **name**: Change "LocalAI (local)" to a descriptive name for your setup.
- **models**: Replace the model names with the actual model names available in your LocalAI instance. You can find available models by checking your LocalAI models directory or using the LocalAI API.
- **limit**: Adjust the `context` and `output` token limits based on your model's capabilities and available resources.
4. **Verify your models**
Ensure that the model names in the configuration match exactly with the model names configured in your LocalAI instance. You can verify available models by checking your LocalAI configuration or using the `/v1/models` endpoint.
5. **Restart OpenCode**
After saving the configuration file, restart OpenCode for the changes to take effect.
### GitHub Actions
You can use LocalAI in GitHub Actions workflows to perform AI-powered tasks like code review, diff summarization, or automated analysis. The [LocalAI GitHub Action](https://github.com/mudler/localai-github-action) makes it easy to spin up a LocalAI instance in your CI/CD pipeline.
#### Prerequisites
- A GitHub repository with Actions enabled
- A model name from [models.localai.io](https://models.localai.io) or a Hugging Face model reference
#### Example Workflow
This example workflow demonstrates how to use LocalAI to summarize pull request diffs and send notifications:
1. **Create a workflow file**
Create a new file in your repository at `.github/workflows/localai.yml`:
```yaml
name: Use LocalAI in GHA
on:
pull_request:
types:
- closed
jobs:
notify-discord:
if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'area/ai-model')) }}
env:
MODEL_NAME: qwen_qwen3-4b-instruct-2507
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
# Starts the LocalAI container
- id: foo
uses: mudler/localai-github-action@v1.1
with:
model: 'qwen_qwen3-4b-instruct-2507' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
# Ask to explain the diff to LocalAI
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "Write a message summarizing the change diffs"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
# Send the summary somewhere (e.g. Discord)
- name: Discord notification
env:
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}
DISCORD_USERNAME: "discord-bot"
DISCORD_AVATAR: ""
uses: Ilshidur/action-discord@master
with:
args: ${{ steps.summarize.outputs.message }}
```
#### Configuration Options
- **Model selection**: Replace `qwen_qwen3-4b-instruct-2507` with any model from [models.localai.io](https://models.localai.io). You can also use Hugging Face models by using the full huggingface model url`.
- **Trigger conditions**: Customize the `if` condition to control when the workflow runs. The example only runs when a PR is merged and has a specific label.
- **API endpoint**: The LocalAI container runs on `http://localhost:8080` by default. The action exposes the service on the standard port.
- **Custom prompts**: Modify the system message in the JSON payload to change what LocalAI is asked to do with the diff.
#### Use Cases
- **Code review automation**: Automatically review code changes and provide feedback
- **Diff summarization**: Generate human-readable summaries of code changes
- **Documentation generation**: Create documentation from code changes
- **Security scanning**: Analyze code for potential security issues
- **Test generation**: Generate test cases based on code changes
#### Additional Resources
- [LocalAI GitHub Action repository](https://github.com/mudler/localai-github-action)
- [Available models](https://models.localai.io)
- [LocalAI API documentation](/reference/)
### Realtime Voice Assistant
LocalAI supports realtime voice interactions , enabling voice assistant applications with real-time speech-to-speech communication. A complete example implementation is available in the [LocalAI-examples repository](https://github.com/mudler/LocalAI-examples/tree/main/realtime).
#### Overview
The realtime voice assistant example demonstrates how to build a voice assistant that:
- Captures audio input from the user in real-time
- Transcribes speech to text using LocalAI's transcription capabilities
- Processes the text with a language model
- Generates audio responses using text-to-speech
- Streams audio back to the user in real-time
#### Prerequisites
- A transcription model (e.g., Whisper) configured in LocalAI
- A text-to-speech model configured in LocalAI
- A language model for generating responses
#### Getting Started
1. **Clone the example repository**
```bash
git clone https://github.com/mudler/LocalAI-examples.git
cd LocalAI-examples/realtime
```
2. **Start LocalAI with Docker Compose**
```bash
docker compose up -d
```
The first time you start docker compose, it will take a while to download the available models. You can follow the model downloads in real-time:
```bash
docker logs -f realtime-localai-1
```
3. **Install host dependencies**
Install the required host dependencies (sudo is required):
```bash
sudo bash setup.sh
```
4. **Run the voice assistant**
Start the voice assistant application:
```bash
bash run.sh
```
#### Configuration Notes
- **CPU vs GPU**: The example is optimized for CPU usage. However, you can run LocalAI with a GPU for better performance and to use bigger/better models.
- **Python client**: The Python part downloads PyTorch for CPU, but this is fine as computation is offloaded to LocalAI. The Python client only runs Silero VAD (Voice Activity Detection), which is fast, and handles audio recording.
- **Thin client architecture**: The Python client is designed to run on thin clients such as Raspberry PIs, while LocalAI handles the heavier computational workload on a more powerful machine.
#### Key Features
- **Real-time processing**: Low-latency audio streaming for natural conversations
- **Voice Activity Detection (VAD)**: Automatic detection of when the user is speaking
- **Turn-taking**: Handles conversation flow with proper turn detection
- **OpenAI-compatible API**: Uses LocalAI's OpenAI-compatible realtime API endpoints
#### Use Cases
- **Voice assistants**: Build custom voice assistants for home automation or productivity
- **Accessibility tools**: Create voice interfaces for accessibility applications
- **Interactive applications**: Add voice interaction to games, educational software, or entertainment apps
- **Customer service**: Implement voice-based customer support systems
#### Additional Resources
- [Realtime Voice Assistant Example](https://github.com/mudler/LocalAI-examples/tree/main/realtime)
- [LocalAI Realtime API documentation](/features/)
- [Audio features documentation](/features/text-to-audio/)
- [Transcription features documentation](/features/audio-to-text/)

View File

@@ -1,4 +1,29 @@
---
- name: "rwkv7-g1c-13.3b"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:
- https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf
description: |
The model is **RWKV7 g1c 13B**, a large language model optimized for efficiency. It is quantized using **Bartowski's calibrationv5 for imatrix** to reduce memory usage while maintaining performance. The base model is **BlinkDL/rwkv7-g1**, and this version is tailored for text-generation tasks. It balances accuracy and efficiency, making it suitable for deployment in various applications.
overrides:
parameters:
model: llama-cpp/models/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
name: rwkv7-g1c-13.3b-gguf
backend: llama-cpp
template:
use_tokenizer_template: true
known_usecases:
- chat
function:
grammar:
disable: true
description: Imported from https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf
options:
- use_jinja:true
files:
- filename: llama-cpp/models/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
sha256: e06b3b31cee207723be00425cfc25ae09b7fa1abbd7d97eda4e62a7ef254f877
uri: https://huggingface.co/NaomiBTW/rwkv7-g1c-13.3b-gguf/resolve/main/rwkv7-g1c-13.3b-20251231-Q8_0.gguf
- name: "iquest-coder-v1-40b-instruct-i1"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
urls:

View File

@@ -2,6 +2,7 @@ package functions
import (
"encoding/json"
"fmt"
"github.com/mudler/xlog"
)
@@ -102,3 +103,91 @@ func (f Functions) Select(name string) Functions {
return funcs
}
// sanitizeValue recursively sanitizes null values in a JSON structure, converting them to empty objects.
// It handles maps, slices, and nested structures.
func sanitizeValue(value interface{}, path string) interface{} {
if value == nil {
// Convert null to empty object
xlog.Debug("SanitizeTools: found null value, converting to empty object", "path", path)
return map[string]interface{}{}
}
switch v := value.(type) {
case map[string]interface{}:
// Recursively sanitize map values
sanitized := make(map[string]interface{})
for key, val := range v {
newPath := path
if newPath != "" {
newPath += "."
}
newPath += key
sanitized[key] = sanitizeValue(val, newPath)
}
return sanitized
case []interface{}:
// Recursively sanitize slice elements
sanitized := make([]interface{}, len(v))
for i, val := range v {
newPath := fmt.Sprintf("%s[%d]", path, i)
sanitized[i] = sanitizeValue(val, newPath)
}
return sanitized
default:
// For primitive types (string, number, bool), return as-is
return value
}
}
// SanitizeTools removes null values from tool.parameters.properties and converts them to empty objects.
// This prevents Jinja template errors when processing tools with malformed parameter schemas.
// It works by marshaling to JSON, recursively sanitizing the JSON structure, and unmarshaling back.
func SanitizeTools(tools Tools) Tools {
if len(tools) == 0 {
return tools
}
xlog.Debug("SanitizeTools: processing tools", "count", len(tools))
// Marshal to JSON to work with the actual JSON representation
toolsJSON, err := json.Marshal(tools)
if err != nil {
xlog.Warn("SanitizeTools: failed to marshal tools to JSON", "error", err)
return tools
}
// Parse JSON into a generic structure
var toolsData []map[string]interface{}
if err := json.Unmarshal(toolsJSON, &toolsData); err != nil {
xlog.Warn("SanitizeTools: failed to unmarshal tools JSON", "error", err)
return tools
}
// Recursively sanitize the JSON structure
for i, tool := range toolsData {
if function, ok := tool["function"].(map[string]interface{}); ok {
// Recursively sanitize the entire tool structure
tool["function"] = sanitizeValue(function, fmt.Sprintf("tools[%d].function", i))
}
toolsData[i] = tool
}
// Marshal back to JSON
sanitizedJSON, err := json.Marshal(toolsData)
if err != nil {
xlog.Warn("SanitizeTools: failed to marshal sanitized tools", "error", err)
return tools
}
// Unmarshal back into Tools structure
var sanitized Tools
if err := json.Unmarshal(sanitizedJSON, &sanitized); err != nil {
xlog.Warn("SanitizeTools: failed to unmarshal sanitized tools", "error", err)
return tools
}
return sanitized
}

View File

@@ -82,4 +82,202 @@ var _ = Describe("LocalAI grammar functions", func() {
Expect(functions[0].Name).To(Equal("create_event"))
})
})
Context("SanitizeTools()", func() {
It("returns empty slice when input is empty", func() {
tools := Tools{}
sanitized := SanitizeTools(tools)
Expect(len(sanitized)).To(Equal(0))
})
It("converts null values in parameters.properties to empty objects", func() {
tools := Tools{
{
Type: "function",
Function: Function{
Name: "test_function",
Description: "A test function",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"valid_param": map[string]interface{}{
"type": "string",
},
"null_param": nil,
"another_valid": map[string]interface{}{
"type": "integer",
},
},
},
},
},
}
sanitized := SanitizeTools(tools)
Expect(len(sanitized)).To(Equal(1))
Expect(sanitized[0].Function.Name).To(Equal("test_function"))
properties := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
Expect(properties["valid_param"]).NotTo(BeNil())
Expect(properties["null_param"]).NotTo(BeNil())
Expect(properties["null_param"]).To(Equal(map[string]interface{}{}))
Expect(properties["another_valid"]).NotTo(BeNil())
})
It("preserves valid parameter structures unchanged", func() {
tools := Tools{
{
Type: "function",
Function: Function{
Name: "valid_function",
Description: "A function with valid parameters",
Parameters: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"param1": map[string]interface{}{
"type": "string",
"description": "First parameter",
},
"param2": map[string]interface{}{
"type": "integer",
},
},
},
},
},
}
sanitized := SanitizeTools(tools)
Expect(len(sanitized)).To(Equal(1))
Expect(sanitized[0].Function.Name).To(Equal("valid_function"))
properties := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
Expect(properties["param1"].(map[string]interface{})["type"]).To(Equal("string"))
Expect(properties["param1"].(map[string]interface{})["description"]).To(Equal("First parameter"))
Expect(properties["param2"].(map[string]interface{})["type"]).To(Equal("integer"))
})
It("handles tools without parameters field", func() {
tools := Tools{
{
Type: "function",
Function: Function{
Name: "no_params_function",
Description: "A function without parameters",
},
},
}
sanitized := SanitizeTools(tools)
Expect(len(sanitized)).To(Equal(1))
Expect(sanitized[0].Function.Name).To(Equal("no_params_function"))
Expect(sanitized[0].Function.Parameters).To(BeNil())
})
It("handles tools without properties field", func() {
tools := Tools{
{
Type: "function",
Function: Function{
Name: "no_properties_function",
Description: "A function without properties",
Parameters: map[string]interface{}{
"type": "object",
},
},
},
}
sanitized := SanitizeTools(tools)
Expect(len(sanitized)).To(Equal(1))
Expect(sanitized[0].Function.Name).To(Equal("no_properties_function"))
Expect(sanitized[0].Function.Parameters["type"]).To(Equal("object"))
})
It("handles multiple tools with mixed valid and null values", func() {
tools := Tools{
{
Type: "function",
Function: Function{
Name: "function_with_nulls",
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"valid": map[string]interface{}{
"type": "string",
},
"null1": nil,
"null2": nil,
},
},
},
},
{
Type: "function",
Function: Function{
Name: "function_all_valid",
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"param1": map[string]interface{}{
"type": "string",
},
"param2": map[string]interface{}{
"type": "integer",
},
},
},
},
},
{
Type: "function",
Function: Function{
Name: "function_no_params",
},
},
}
sanitized := SanitizeTools(tools)
Expect(len(sanitized)).To(Equal(3))
// First tool should have nulls converted to empty objects
props1 := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
Expect(props1["valid"]).NotTo(BeNil())
Expect(props1["null1"]).To(Equal(map[string]interface{}{}))
Expect(props1["null2"]).To(Equal(map[string]interface{}{}))
// Second tool should remain unchanged
props2 := sanitized[1].Function.Parameters["properties"].(map[string]interface{})
Expect(props2["param1"].(map[string]interface{})["type"]).To(Equal("string"))
Expect(props2["param2"].(map[string]interface{})["type"]).To(Equal("integer"))
// Third tool should remain unchanged
Expect(sanitized[2].Function.Parameters).To(BeNil())
})
It("does not modify the original tools slice", func() {
tools := Tools{
{
Type: "function",
Function: Function{
Name: "test_function",
Parameters: map[string]interface{}{
"properties": map[string]interface{}{
"null_param": nil,
},
},
},
},
}
originalProperties := tools[0].Function.Parameters["properties"].(map[string]interface{})
originalNullValue := originalProperties["null_param"]
sanitized := SanitizeTools(tools)
// Original should still have nil
Expect(originalNullValue).To(BeNil())
// Sanitized should have empty object
sanitizedProperties := sanitized[0].Function.Parameters["properties"].(map[string]interface{})
Expect(sanitizedProperties["null_param"]).To(Equal(map[string]interface{}{}))
})
})
})

View File

@@ -702,30 +702,6 @@ const docTemplate = `{
}
}
},
"/mcp/v1/completions": {
"post": {
"summary": "Generate completions for a given prompt and model.",
"parameters": [
{
"description": "query params",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenAIRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.OpenAIResponse"
}
}
}
}
},
"/metrics": {
"get": {
"summary": "Prometheus metrics endpoint",

View File

@@ -695,30 +695,6 @@
}
}
},
"/mcp/v1/completions": {
"post": {
"summary": "Generate completions for a given prompt and model.",
"parameters": [
{
"description": "query params",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.OpenAIRequest"
}
}
],
"responses": {
"200": {
"description": "Response",
"schema": {
"$ref": "#/definitions/schema.OpenAIResponse"
}
}
}
}
},
"/metrics": {
"get": {
"summary": "Prometheus metrics endpoint",

View File

@@ -1495,21 +1495,6 @@ paths:
schema:
$ref: '#/definitions/services.GalleryOpStatus'
summary: Returns the job status
/mcp/v1/completions:
post:
parameters:
- description: query params
in: body
name: request
required: true
schema:
$ref: '#/definitions/schema.OpenAIRequest'
responses:
"200":
description: Response
schema:
$ref: '#/definitions/schema.OpenAIResponse'
summary: Generate completions for a given prompt and model.
/metrics:
get:
parameters: