feat(turboquant.cpp): add new backend

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Update asset links in README.md
2026-07-05 22:09:02 -04:00 · 2026-04-03 20:57:15 +00:00 · 2026-04-03 10:24:08 +02:00 · 2026-04-03 10:23:03 +02:00 · 2026-04-03 10:14:13 +02:00 · 2026-04-03 09:46:06 +02:00
37 changed files with 373 additions and 92 deletions
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -1828,6 +1828,98 @@ jobs:
            dockerfile: "./backend/Dockerfile.llama-cpp"
            context: "./"
            ubuntu-version: '2404'
+          # llama-cpp-tq (TurboQuant fork)
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-llama-cpp-tq'
+            runs-on: 'bigger-runner'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "8"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp-tq'
+            runs-on: 'bigger-runner'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp-tq'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp-tq'
+            base-image: "ubuntu:24.04"
+            runs-on: 'ubuntu-24.04-arm'
+            ubuntu-version: '2404'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-rocm-hipblas-llama-cpp-tq'
+            runs-on: 'ubuntu-latest'
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-arm64-llama-cpp-tq'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2204'
+          - build-type: 'vulkan'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-vulkan-llama-cpp-tq'
+            runs-on: 'bigger-runner'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
          # Stablediffusion-ggml
          - build-type: ''
            cuda-major-version: ""
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -14,6 +14,11 @@ jobs:
            variable: "LLAMA_VERSION"
            branch: "master"
            file: "backend/cpp/llama-cpp/Makefile"
+          - repository: "TheTom/llama-cpp-turboquant"
+            variable: "LLAMA_VERSION"
+            branch: "master"
+            file: "backend/cpp/llama-cpp-tq/Makefile"
+            branch_suffix: "-tq"
          - repository: "ggml-org/whisper.cpp"
            variable: "WHISPER_CPP_VERSION"
            branch: "master"
@@ -60,7 +65,7 @@ jobs:
          push-to-fork: ci-forks/LocalAI
          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
-          branch: "update/${{ matrix.variable }}"
+          branch: "update/${{ matrix.variable }}${{ matrix.branch_suffix }}"
          body: ${{ steps.bump.outputs.message }}
          signoff: true

--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ prepare-sources
 /backend/cpp/llama-cpp/llama.cpp
 /backend/cpp/llama-*
 !backend/cpp/llama-cpp
+!backend/cpp/llama-cpp-tq
 /backends
 /backend-images
 /result.yaml
--- a/4
+++ b/4
@@ -544,8 +544,9 @@ backend-images:
 	mkdir -p backend-images

 # Backend metadata: BACKEND_NAME | DOCKERFILE_TYPE | BUILD_CONTEXT | PROGRESS_FLAG | NEEDS_BACKEND_ARG
-# llama-cpp is special - uses llama-cpp Dockerfile and doesn't need BACKEND arg
+# llama-cpp and forks - use llama-cpp Dockerfile
 BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false
+BACKEND_LLAMA_CPP_TQ = llama-cpp-tq|llama-cpp|.|false|true

 # Golang backends
 BACKEND_PIPER = piper|golang|.|false|true
@@ -609,6 +610,7 @@ endef

 # Generate all docker-build targets
 $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
+$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_TQ)))
 $(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
 $(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
 $(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
--- a/README.md
+++ b/README.md
@@ -42,16 +42,38 @@ Created and maintained by [Ettore Di Giacinto](https://github.com/mudler).

 > [:book: Documentation](https://localai.io/) | [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) | [💻 Quickstart](https://localai.io/basics/getting_started/) | [🖼️ Models](https://models.localai.io/) | [❓FAQ](https://localai.io/faq/)

-## Screenshots
-
-### Chat, Model gallery
+## Guided tour

 https://github.com/user-attachments/assets/08cbb692-57da-48f7-963d-2e7b43883c18

-### Agents
+<details>
+
+<summary>
+Click to see more!
+</summary>
+
+#### User and auth
+
+https://github.com/user-attachments/assets/228fa9ad-81a3-4d43-bfb9-31557e14a36c
+
+#### Agents

 https://github.com/user-attachments/assets/6270b331-e21d-4087-a540-6290006b381a

+#### Usage metrics per user
+
+https://github.com/user-attachments/assets/cbb03379-23b4-4e3d-bd26-d152f057007f
+
+#### Fine-tuning and Quantization
+
+https://github.com/user-attachments/assets/5ba4ace9-d3df-4795-b7d4-b0b404ea71ee
+
+#### WebRTC
+
+https://github.com/user-attachments/assets/ed88e34c-fed3-4b83-8a67-4716a9feeb7b
+
+</details>
+
 ## Quickstart

 ### macOS
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -58,7 +58,9 @@ ARG CUDA_DOCKER_ARCH
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 ARG CMAKE_ARGS
 ENV CMAKE_ARGS=${CMAKE_ARGS}
-ARG BACKEND=rerankers
+ARG BACKEND=llama-cpp
+ARG LLAMA_BACKEND_DIR=${BACKEND}
+ENV LLAMA_BACKEND_DIR=${LLAMA_BACKEND_DIR}
 ARG BUILD_TYPE
 ENV BUILD_TYPE=${BUILD_TYPE}
 ARG CUDA_MAJOR_VERSION
@@ -255,32 +257,27 @@ if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
  CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
  export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
  echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
-  rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
+  rm -rf /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}-*-build
 fi

+cd /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}
+
 if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
-  cd /LocalAI/backend/cpp/llama-cpp
-  make llama-cpp-fallback
-  make llama-cpp-grpc
-  make llama-cpp-rpc-server
+  make ARCH=aarch64 build-variants
 else
-  cd /LocalAI/backend/cpp/llama-cpp
-  make llama-cpp-avx
-  make llama-cpp-avx2
-  make llama-cpp-avx512
-  make llama-cpp-fallback
-  make llama-cpp-grpc
-  make llama-cpp-rpc-server
+  make build-variants
 fi
 EOT


 # Copy libraries using a script to handle architecture differences
-RUN make -BC /LocalAI/backend/cpp/llama-cpp package
+RUN make -BC /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR} package


 FROM scratch

+ARG BACKEND=llama-cpp
+ARG LLAMA_BACKEND_DIR=${BACKEND}

 # Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
-COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./
+COPY --from=builder /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}/package/. ./
--- a/backend/cpp/llama-cpp-tq/Makefile
+++ b/backend/cpp/llama-cpp-tq/Makefile
@@ -0,0 +1,6 @@
+LLAMA_VERSION?=master
+LLAMA_REPO?=https://github.com/TheTom/llama-cpp-turboquant
+BACKEND_NAME?=llama-cpp-tq
+SHARED_DIR?=$(CURDIR)/../llama-cpp
+
+include ../llama-cpp/Makefile
--- a/backend/cpp/llama-cpp/CMakeLists.txt
+++ b/backend/cpp/llama-cpp/CMakeLists.txt
@@ -59,6 +59,11 @@ add_library(hw_grpc_proto

 add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)

+# Enable autoparser support if the header exists (not present in all llama.cpp forks)
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/chat-auto-parser.h")
+    target_compile_definitions(${TARGET} PRIVATE HAS_AUTOPARSER)
+endif()
+
 target_include_directories(${TARGET} PRIVATE ../llava)
 target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})

--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,6 +1,10 @@

-LLAMA_VERSION?=95a6ebabb277c4cc18247e7bc2a5502133caca63
+LLAMA_VERSION?=a1cfb645307edc61a89e41557f290f441043d3c2
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
+BACKEND_NAME?=llama-cpp
+SHARED_DIR?=$(CURDIR)
+GRPC_SERVER_DIR?=tools/grpc-server
+SERVER_SOURCE_DIR?=tools/server

 CMAKE_ARGS?=
 BUILD_TYPE?=
@@ -67,6 +71,17 @@ ifeq ($(BUILD_TYPE),sycl_f32)
 		-DCMAKE_CXX_FLAGS="-fsycl"
 endif

+# Variants to build for each architecture (can be overridden by forks)
+X86_64_VARIANTS ?= llama-cpp-avx llama-cpp-avx2 llama-cpp-avx512 llama-cpp-fallback llama-cpp-grpc llama-cpp-rpc-server
+ARM64_VARIANTS ?= llama-cpp-fallback llama-cpp-grpc llama-cpp-rpc-server
+
+build-variants:
+ifeq ($(ARCH),aarch64)
+	@for v in $(ARM64_VARIANTS); do $(MAKE) $$v || exit 1; done
+else
+	@for v in $(X86_64_VARIANTS); do $(MAKE) $$v || exit 1; done
+endif
+
 INSTALLED_PACKAGES=$(CURDIR)/../grpc/installed_packages
 INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
 ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
@@ -90,42 +105,42 @@ else
 endif

 llama-cpp-avx2: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="$(BACKEND_NAME)-avx2-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build/grpc-server llama-cpp-avx2

 llama-cpp-avx512: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build purge
 	$(info ${GREEN}I llama-cpp build info:avx512${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="$(BACKEND_NAME)-avx512-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build/grpc-server llama-cpp-avx512

 llama-cpp-avx: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build purge
 	$(info ${GREEN}I llama-cpp build info:avx${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="$(BACKEND_NAME)-avx-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build/grpc-server llama-cpp-avx

 llama-cpp-fallback: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="$(BACKEND_NAME)-fallback-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build/grpc-server llama-cpp-fallback

 llama-cpp-grpc: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="$(BACKEND_NAME)-grpc-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build/grpc-server llama-cpp-grpc

 llama-cpp-rpc-server: llama-cpp-grpc
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server

 llama.cpp:
 	mkdir -p llama.cpp
@@ -133,30 +148,30 @@ llama.cpp:
 	git init && \
 	git remote add origin $(LLAMA_REPO)  && \
 	git fetch origin && \
-	git checkout -b build $(LLAMA_VERSION) && \
+	(git checkout -b build $(LLAMA_VERSION) || git checkout -b build origin/$(LLAMA_VERSION)) && \
 	git submodule update --init --recursive --depth 1 --single-branch

-llama.cpp/tools/grpc-server: llama.cpp
-	mkdir -p llama.cpp/tools/grpc-server
-	bash prepare.sh
+llama.cpp/$(GRPC_SERVER_DIR): llama.cpp
+	mkdir -p llama.cpp/$(GRPC_SERVER_DIR)
+	SHARED_DIR=$(SHARED_DIR) SERVER_SOURCE_DIR=$(SERVER_SOURCE_DIR) GRPC_SERVER_DIR=$(GRPC_SERVER_DIR) bash $(SHARED_DIR)/prepare.sh

 rebuild:
-	bash prepare.sh
+	SHARED_DIR=$(SHARED_DIR) SERVER_SOURCE_DIR=$(SERVER_SOURCE_DIR) GRPC_SERVER_DIR=$(GRPC_SERVER_DIR) bash $(SHARED_DIR)/prepare.sh
 	rm -rf grpc-server
 	$(MAKE) grpc-server

 package:
-	bash package.sh
+	bash $(SHARED_DIR)/package.sh

 purge:
 	rm -rf llama.cpp/build
-	rm -rf llama.cpp/tools/grpc-server
+	rm -rf llama.cpp/$(GRPC_SERVER_DIR)
 	rm -rf grpc-server

 clean: purge
 	rm -rf llama.cpp

-grpc-server: llama.cpp llama.cpp/tools/grpc-server
+grpc-server: llama.cpp llama.cpp/$(GRPC_SERVER_DIR)
 	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 	+bash -c "source $(ONEAPI_VARS); \
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -17,7 +17,9 @@
 #include "backend.pb.h"
 #include "backend.grpc.pb.h"
 #include "common.h"
+#ifdef HAS_AUTOPARSER
 #include "chat-auto-parser.h"
+#endif
 #include <getopt.h>
 #include <grpcpp/ext/proto_server_reflection_plugin.h>
 #include <grpcpp/grpcpp.h>
@@ -2665,6 +2667,7 @@ public:
        
        response->set_rendered_template(rendered_template);

+#ifdef HAS_AUTOPARSER
        // Run differential template analysis to detect tool format markers
        if (params_base.use_jinja) {
            try {
@@ -2770,6 +2773,7 @@ public:
                SRV_WRN("ModelMetadata: failed to run autoparser analysis: %s\n", e.what());
            }
        }
+#endif

        return grpc::Status::OK;
    }
--- a/backend/cpp/llama-cpp/package.sh
+++ b/backend/cpp/llama-cpp/package.sh
@@ -5,14 +5,21 @@

 set -e

-CURDIR=$(dirname "$(realpath $0)")
-REPO_ROOT="${CURDIR}/../../.."
+# Use working directory (not script location) so forks that share this script work correctly
+CURDIR=$(pwd)
+SCRIPT_DIR=$(dirname "$(realpath $0)")
+REPO_ROOT="${SCRIPT_DIR}/../../.."

 # Create lib directory
 mkdir -p $CURDIR/package/lib

 cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
+# Copy run.sh — prefer local copy, fall back to shared dir (script location)
+if [ -f "$CURDIR/run.sh" ]; then
+    cp -rfv $CURDIR/run.sh $CURDIR/package/
+else
+    cp -rfv $SCRIPT_DIR/run.sh $CURDIR/package/
+fi

 # Detect architecture and copy appropriate libraries
 if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -1,31 +1,43 @@
 #!/bin/bash

-## Patches
+SHARED_DIR="${SHARED_DIR:-.}"
+SERVER_SOURCE_DIR="${SERVER_SOURCE_DIR:-tools/server}"
+GRPC_SERVER_DIR="${GRPC_SERVER_DIR:-tools/grpc-server}"

 ## Apply patches from the `patches` directory
 if [ -d "patches" ]; then
    for patch in $(ls patches); do
        echo "Applying patch $patch"
        patch -d llama.cpp/ -p1 < patches/$patch
-    done 
+    done
 fi

 set -e

-for file in $(ls llama.cpp/tools/server/); do
-    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
+# Copy server source files into grpc-server build directory
+for file in $(ls llama.cpp/${SERVER_SOURCE_DIR}/); do
+    cp -rfv llama.cpp/${SERVER_SOURCE_DIR}/$file llama.cpp/${GRPC_SERVER_DIR}/
 done

-cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
-cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
-cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
-cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
+# Copy build files — prefer local overrides, fall back to SHARED_DIR
+for f in CMakeLists.txt grpc-server.cpp; do
+    if [ -f "$f" ]; then
+        cp -r "$f" llama.cpp/${GRPC_SERVER_DIR}/
+    else
+        cp -r "$SHARED_DIR/$f" llama.cpp/${GRPC_SERVER_DIR}/
+    fi
+done
+
+cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/${GRPC_SERVER_DIR}/
+cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/${GRPC_SERVER_DIR}/
+
+# Add grpc-server subdirectory to the parent CMakeLists.txt
+PARENT_CMAKELISTS="llama.cpp/$(dirname ${GRPC_SERVER_DIR})/CMakeLists.txt"

 set +e
-if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
+if grep -q "grpc-server" "$PARENT_CMAKELISTS"; then
    echo "grpc-server already added"
 else
-    echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
+    echo "add_subdirectory(grpc-server)" >> "$PARENT_CMAKELISTS"
 fi
 set -e
-
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -29,6 +29,34 @@
    nvidia-cuda-12: "cuda12-llama-cpp"
    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp"
    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp"
+- &llamacpp_tq
+  name: "llama-cpp-tq"
+  alias: "llama-cpp-tq"
+  license: mit
+  description: |
+    TurboQuant llama.cpp fork - quantization research
+  urls:
+    - https://github.com/TheTom/llama-cpp-turboquant
+  tags:
+    - text-to-text
+    - LLM
+    - CPU
+    - GPU
+    - Metal
+    - CUDA
+    - HIP
+  capabilities:
+    default: "cpu-llama-cpp-tq"
+    nvidia: "cuda12-llama-cpp-tq"
+    intel: "intel-sycl-f16-llama-cpp-tq"
+    amd: "rocm-llama-cpp-tq"
+    metal: "metal-llama-cpp-tq"
+    vulkan: "vulkan-llama-cpp-tq"
+    nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-tq"
+    nvidia-cuda-13: "cuda13-llama-cpp-tq"
+    nvidia-cuda-12: "cuda12-llama-cpp-tq"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-tq"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-tq"
 - &whispercpp
  name: "whisper"
  alias: "whisper"
@@ -1252,6 +1280,57 @@
  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-llama-cpp"
  mirrors:
    - localai/localai-backends:master-gpu-nvidia-cuda-13-llama-cpp
+# llama-cpp-tq (TurboQuant) concrete backends
+- !!merge <<: *llamacpp_tq
+  name: "cpu-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-cpu-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "cuda12-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "cuda13-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "rocm-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-rocm-hipblas-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "intel-sycl-f16-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-sycl-f16-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "intel-sycl-f32-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-sycl-f32-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "vulkan-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-vulkan-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "metal-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-metal-darwin-arm64-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "nvidia-l4t-arm64-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-arm64-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "cuda13-nvidia-l4t-arm64-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp-tq
 ## whisper
 - !!merge <<: *whispercpp
  name: "nvidia-l4t-arm64-whisper"
--- a/docs/content/features/GPU-acceleration.md
+++ b/docs/content/features/GPU-acceleration.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "⚡ GPU acceleration"
+title = "GPU Acceleration"
 weight = 9
 url = "/features/gpu-acceleration/"
 +++
--- a/docs/content/features/_index.en.md
+++ b/docs/content/features/_index.en.md
@@ -27,8 +27,7 @@ LocalAI provides a comprehensive set of features for running AI models locally.
 - **[Realtime API](openai-realtime/)** - Low-latency multi-modal conversations (voice+text) over WebSocket
 - **[Constrained Grammars](constrained_grammars/)** - Control model output format with BNF grammars
 - **[GPU Acceleration](GPU-acceleration/)** - Optimize performance with GPU support
- **[Distributed Inference](distributed_inferencing/)** - Scale inference across multiple nodes
- **[Distributed Mode](distributed-mode/)** - Horizontal scaling with PostgreSQL, NATS, and remote backend nodes
+- **[Distribution](distribution/)** - Scale inference across multiple nodes (P2P federation or production distributed mode)
 - **[P2P API](p2p/)** - Monitor and manage P2P worker and federated nodes
 - **[Model Context Protocol (MCP)](mcp/)** - Enable agentic capabilities with MCP integration
 - **[Agents](agents/)** - Autonomous AI agents with tools, knowledge base, and skills
--- a/docs/content/features/agents.md
+++ b/docs/content/features/agents.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "🤖 Agents"
+title = "Agents"
 weight = 21
 url = '/features/agents'
 +++
--- a/docs/content/features/audio-to-text.md
+++ b/docs/content/features/audio-to-text.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "🔈 Audio to text"
+title = "Audio to Text"
 weight = 16
 url = "/features/audio-to-text/"
 +++
--- a/docs/content/features/authentication.md
+++ b/docs/content/features/authentication.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "🔐 Authentication & Authorization"
+title = "Authentication & Authorization"
 weight = 26
 url = '/features/authentication'
 +++
--- a/docs/content/features/backends.md
+++ b/docs/content/features/backends.md
@@ -1,5 +1,5 @@
 ---
-title: "⚙️ Backends"
+title: "Backends"
 description: "Learn how to use, manage, and develop backends in LocalAI"
 weight: 4
 url: "/backends/"
--- a/docs/content/features/constrained_grammars.md
+++ b/docs/content/features/constrained_grammars.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "✍️ Constrained Grammars"
+title = "Constrained Grammars"
 weight = 15
 url = "/features/constrained_grammars/"
 +++
--- a/docs/content/features/distributed-mode.md
+++ b/docs/content/features/distributed-mode.md
@@ -5,7 +5,7 @@ weight = 14
 url = "/features/distributed-mode/"
 +++

-Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach](/features/distribute/), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.
+Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach]({{% relref "features/distributed_inferencing" %}}), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.

 {{% notice note %}}
 Distributed mode requires authentication enabled with a **PostgreSQL** database — SQLite is not supported. This is because the node registry, job store, and other distributed state are stored in PostgreSQL tables.
--- a/docs/content/features/distributed_inferencing.md
+++ b/docs/content/features/distributed_inferencing.md
@@ -1,12 +1,12 @@
 +++
 disableToc = false
-title = "🆕🖧 Distributed Inference"
+title = "P2P / Federated Inference"
 weight = 15
 url = "/features/distribute/"
 +++

 {{% notice tip %}}
-Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode](/features/distributed-mode/).
+Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode]({{% relref "features/distributed-mode" %}}).
 {{% /notice %}}

 This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
--- a/docs/content/features/distribution.md
+++ b/docs/content/features/distribution.md
@@ -0,0 +1,34 @@
+++
+disableToc = false
+title = "Distribution"
+weight = 13
+url = "/features/distribution/"
+++
+
+LocalAI supports distributing inference workloads across multiple machines. There are two approaches, each suited to different use cases:
+
+## Distributed Mode (PostgreSQL + NATS)
+
+Production-grade horizontal scaling with centralized management. Frontends are stateless LocalAI instances behind a load balancer; workers self-register and receive backends dynamically via NATS. State lives in PostgreSQL.
+
+**Best for:** production deployments, Kubernetes, managed infrastructure.
+
+[Read more]({{% relref "features/distributed-mode" %}})
+
+## P2P / Federated Inference
+
+Peer-to-peer networking via libp2p. Share a token to form a cluster with automatic discovery — no central server required. Supports federated load balancing and worker-mode weight sharding.
+
+**Best for:** ad-hoc clusters, community sharing, quick experimentation.
+
+[Read more]({{% relref "features/distributed_inferencing" %}})
+
+## Quick Comparison
+
+| | P2P / Federation | Distributed Mode |
+|---|---|---|
+| **Discovery** | Automatic via libp2p token | Self-registration to frontend URL |
+| **State storage** | In-memory / ledger | PostgreSQL |
+| **Coordination** | Gossip protocol | NATS messaging |
+| **Node management** | Automatic | REST API + WebUI |
+| **Setup complexity** | Minimal (share a token) | Requires PostgreSQL + NATS |
--- a/docs/content/features/embeddings.md
+++ b/docs/content/features/embeddings.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "🧠 Embeddings"
+title = "Embeddings"
 weight = 13
 url = "/features/embeddings/"
 +++
--- a/docs/content/features/gpt-vision.md
+++ b/docs/content/features/gpt-vision.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "🥽 GPT Vision"
+title = "GPT Vision"
 weight = 14
 url = "/features/gpt-vision/"
 +++
--- a/docs/content/features/image-generation.md
+++ b/docs/content/features/image-generation.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "🎨 Image generation"
+title = "Image Generation"
 weight = 12
 url = "/features/image-generation/"
 +++
--- a/docs/content/features/mcp.md
+++ b/docs/content/features/mcp.md
@@ -1,5 +1,5 @@
 +++
-title = "🔗 Model Context Protocol (MCP)"
+title = "Model Context Protocol (MCP)"
 weight = 20
 toc = true
 description = "Agentic capabilities with Model Context Protocol integration"
--- a/docs/content/features/model-gallery.md
+++ b/docs/content/features/model-gallery.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "🖼️ Model gallery"
+title = "Model Gallery"
 weight = 18
 url = '/models'
 +++
--- a/docs/content/features/object-detection.md
+++ b/docs/content/features/object-detection.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "🔍 Object detection"
+title = "Object Detection"
 weight = 13
 url = "/features/object-detection/"
 +++
--- a/docs/content/features/openai-functions.md
+++ b/docs/content/features/openai-functions.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "🔥 OpenAI functions and tools"
+title = "OpenAI Functions and Tools"
 weight = 17
 url = "/features/openai-functions/"
 +++
--- a/docs/content/features/reranker.md
+++ b/docs/content/features/reranker.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "📈 Reranker"
+title = "Reranker"
 weight = 11
 url = "/features/reranker/"
 +++
--- a/docs/content/features/runtime-settings.md
+++ b/docs/content/features/runtime-settings.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "⚙️ Runtime Settings"
+title = "Runtime Settings"
 weight = 25
 url = '/features/runtime-settings'
 +++
--- a/docs/content/features/stores.md
+++ b/docs/content/features/stores.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "💾 Stores"
+title = "Stores"
 weight = 18
 url = '/stores'
 +++
--- a/docs/content/features/text-generation.md
+++ b/docs/content/features/text-generation.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "📖 Text generation (GPT)"
+title = "Text Generation (GPT)"
 weight = 10
 url = "/features/text-generation/"
 +++
--- a/docs/content/features/text-to-audio.md
+++ b/docs/content/features/text-to-audio.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "🗣 Text to audio (TTS)"
+title = "Text to Audio (TTS)"
 weight = 11
 url = "/features/text-to-audio/"
 +++
--- a/docs/content/getting-started/quickstart.md
+++ b/docs/content/getting-started/quickstart.md
@@ -119,7 +119,7 @@ For production deployments or when you need more compute, LocalAI supports distr
 - **P2P federation**: Connect multiple LocalAI instances for load-balanced inference
 - **Model sharding**: Split large models across multiple machines

-See the **Nodes** page in the web interface or the [Distribution docs]({{% relref "features/distribute" %}}) for setup instructions.
+See the **Nodes** page in the web interface or the [Distribution docs]({{% relref "features/distribution" %}}) for setup instructions.

 ## What's Next?

--- a/docs/content/integrations.md
+++ b/docs/content/integrations.md
@@ -72,9 +72,10 @@ Feel free to open up a Pull request (by clicking at the "Edit page" below) to ge

 ### Home Automation

- [hass-openai-custom-conversation](https://github.com/drndos/hass-openai-custom-conversation) — Home Assistant integration
- [ha-llmvision](https://github.com/valentinfrlch/ha-llmvision) — Home Assistant LLM Vision
- [HA-LocalAI-Monitor](https://github.com/loryanstrant/HA-LocalAI-Monitor) — Home Assistant monitoring
+- [Extended OpenAI Conversation](https://github.com/jekalmin/extended_openai_conversation) — Conversation agent for Home Assistant that supports a custom OpenAI endpoint
+- [LLM Vision](https://github.com/valentinfrlch/ha-llmvision) — Image & video feed analysis for Home Assistant
+- [OpenAI TTS Speech Service](https://github.com/sfortis/openai_tts) - OpenAI TTS custom component for Home Assistant
+- [LocalAI Monitor](https://github.com/loryanstrant/HA-LocalAI-Monitor) — Monitor & control of LocalAI from Home Assistant
 - Nextcloud [integration plugin](https://apps.nextcloud.com/apps/integration_openai) and [AI assistant](https://apps.nextcloud.com/apps/assistant)

 ### Automation & DevOps
Author	SHA1	Message	Date
Ettore Di Giacinto	6e11f882f7	feat(turboquant.cpp): add new backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-04-03 20:57:15 +00:00
Ettore Di Giacinto	8577bdcebc	Update asset links in README.md Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2026-04-03 10:24:08 +02:00
Ettore Di Giacinto	0d489c7a0d	Add guided tour and update screenshots section Updated README to include a guided tour section with links to various assets and details about agents and usage metrics. Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2026-04-03 10:23:03 +02:00
Ettore Di Giacinto	11dc54bda9	fix(docs): commit distribution.md Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-04-03 10:14:13 +02:00
Ettore Di Giacinto	7e0b73deaa	fix(docs): fix broken references to distributed mode Signed-off-by: Ettore Di Giacinto <mudler@localai.io>	2026-04-03 09:46:06 +02:00
LocalAI [bot]	c0a023d13d	chore: ⬆️ Update ggml-org/llama.cpp to `a1cfb645307edc61a89e41557f290f441043d3c2` (#9203 ) ⬆️ Update ggml-org/llama.cpp Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>	2026-04-03 08:30:15 +02:00
Loryan Strant	0d3ae1c295	docs: Update Home Assistant integrations list (#9206 ) Update Home Assistant integrations list Signed-off-by: Loryan Strant <51473494+loryanstrant@users.noreply.github.com>	2026-04-03 08:30:00 +02:00