mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-21 07:05:07 -04:00
Compare commits
7 Commits
v4.1.0
...
feat/tq-ik
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6e11f882f7 | ||
|
|
8577bdcebc | ||
|
|
0d489c7a0d | ||
|
|
11dc54bda9 | ||
|
|
7e0b73deaa | ||
|
|
c0a023d13d | ||
|
|
0d3ae1c295 |
92
.github/workflows/backend.yml
vendored
92
.github/workflows/backend.yml
vendored
@@ -1828,6 +1828,98 @@ jobs:
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# llama-cpp-tq (TurboQuant fork)
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-cpu-llama-cpp-tq'
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp-tq"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "8"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp-tq'
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp-tq"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp-tq'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp-tq"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "13"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp-tq'
|
||||
base-image: "ubuntu:24.04"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
ubuntu-version: '2404'
|
||||
backend: "llama-cpp-tq"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
- build-type: 'hipblas'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-rocm-hipblas-llama-cpp-tq'
|
||||
runs-on: 'ubuntu-latest'
|
||||
base-image: "rocm/dev-ubuntu-24.04:6.4.4"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp-tq"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "0"
|
||||
platforms: 'linux/arm64'
|
||||
skip-drivers: 'false'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-nvidia-l4t-arm64-llama-cpp-tq'
|
||||
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
|
||||
runs-on: 'ubuntu-24.04-arm'
|
||||
backend: "llama-cpp-tq"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
ubuntu-version: '2204'
|
||||
- build-type: 'vulkan'
|
||||
cuda-major-version: ""
|
||||
cuda-minor-version: ""
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-gpu-vulkan-llama-cpp-tq'
|
||||
runs-on: 'bigger-runner'
|
||||
base-image: "ubuntu:24.04"
|
||||
skip-drivers: 'false'
|
||||
backend: "llama-cpp-tq"
|
||||
dockerfile: "./backend/Dockerfile.llama-cpp"
|
||||
context: "./"
|
||||
ubuntu-version: '2404'
|
||||
# Stablediffusion-ggml
|
||||
- build-type: ''
|
||||
cuda-major-version: ""
|
||||
|
||||
7
.github/workflows/bump_deps.yaml
vendored
7
.github/workflows/bump_deps.yaml
vendored
@@ -14,6 +14,11 @@ jobs:
|
||||
variable: "LLAMA_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/cpp/llama-cpp/Makefile"
|
||||
- repository: "TheTom/llama-cpp-turboquant"
|
||||
variable: "LLAMA_VERSION"
|
||||
branch: "master"
|
||||
file: "backend/cpp/llama-cpp-tq/Makefile"
|
||||
branch_suffix: "-tq"
|
||||
- repository: "ggml-org/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
@@ -60,7 +65,7 @@ jobs:
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
|
||||
branch: "update/${{ matrix.variable }}"
|
||||
branch: "update/${{ matrix.variable }}${{ matrix.branch_suffix }}"
|
||||
body: ${{ steps.bump.outputs.message }}
|
||||
signoff: true
|
||||
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,6 +9,7 @@ prepare-sources
|
||||
/backend/cpp/llama-cpp/llama.cpp
|
||||
/backend/cpp/llama-*
|
||||
!backend/cpp/llama-cpp
|
||||
!backend/cpp/llama-cpp-tq
|
||||
/backends
|
||||
/backend-images
|
||||
/result.yaml
|
||||
|
||||
4
Makefile
4
Makefile
@@ -544,8 +544,9 @@ backend-images:
|
||||
mkdir -p backend-images
|
||||
|
||||
# Backend metadata: BACKEND_NAME | DOCKERFILE_TYPE | BUILD_CONTEXT | PROGRESS_FLAG | NEEDS_BACKEND_ARG
|
||||
# llama-cpp is special - uses llama-cpp Dockerfile and doesn't need BACKEND arg
|
||||
# llama-cpp and forks - use llama-cpp Dockerfile
|
||||
BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false
|
||||
BACKEND_LLAMA_CPP_TQ = llama-cpp-tq|llama-cpp|.|false|true
|
||||
|
||||
# Golang backends
|
||||
BACKEND_PIPER = piper|golang|.|false|true
|
||||
@@ -609,6 +610,7 @@ endef
|
||||
|
||||
# Generate all docker-build targets
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_TQ)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
|
||||
$(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
|
||||
|
||||
30
README.md
30
README.md
@@ -42,16 +42,38 @@ Created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||
> [:book: Documentation](https://localai.io/) | [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) | [💻 Quickstart](https://localai.io/basics/getting_started/) | [🖼️ Models](https://models.localai.io/) | [❓FAQ](https://localai.io/faq/)
|
||||
|
||||
## Screenshots
|
||||
|
||||
### Chat, Model gallery
|
||||
## Guided tour
|
||||
|
||||
https://github.com/user-attachments/assets/08cbb692-57da-48f7-963d-2e7b43883c18
|
||||
|
||||
### Agents
|
||||
<details>
|
||||
|
||||
<summary>
|
||||
Click to see more!
|
||||
</summary>
|
||||
|
||||
#### User and auth
|
||||
|
||||
https://github.com/user-attachments/assets/228fa9ad-81a3-4d43-bfb9-31557e14a36c
|
||||
|
||||
#### Agents
|
||||
|
||||
https://github.com/user-attachments/assets/6270b331-e21d-4087-a540-6290006b381a
|
||||
|
||||
#### Usage metrics per user
|
||||
|
||||
https://github.com/user-attachments/assets/cbb03379-23b4-4e3d-bd26-d152f057007f
|
||||
|
||||
#### Fine-tuning and Quantization
|
||||
|
||||
https://github.com/user-attachments/assets/5ba4ace9-d3df-4795-b7d4-b0b404ea71ee
|
||||
|
||||
#### WebRTC
|
||||
|
||||
https://github.com/user-attachments/assets/ed88e34c-fed3-4b83-8a67-4716a9feeb7b
|
||||
|
||||
</details>
|
||||
|
||||
## Quickstart
|
||||
|
||||
### macOS
|
||||
|
||||
@@ -58,7 +58,9 @@ ARG CUDA_DOCKER_ARCH
|
||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||
ARG CMAKE_ARGS
|
||||
ENV CMAKE_ARGS=${CMAKE_ARGS}
|
||||
ARG BACKEND=rerankers
|
||||
ARG BACKEND=llama-cpp
|
||||
ARG LLAMA_BACKEND_DIR=${BACKEND}
|
||||
ENV LLAMA_BACKEND_DIR=${LLAMA_BACKEND_DIR}
|
||||
ARG BUILD_TYPE
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
ARG CUDA_MAJOR_VERSION
|
||||
@@ -255,32 +257,27 @@ if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
|
||||
CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
|
||||
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
|
||||
echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
|
||||
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
|
||||
rm -rf /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}-*-build
|
||||
fi
|
||||
|
||||
cd /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}
|
||||
|
||||
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
make ARCH=aarch64 build-variants
|
||||
else
|
||||
cd /LocalAI/backend/cpp/llama-cpp
|
||||
make llama-cpp-avx
|
||||
make llama-cpp-avx2
|
||||
make llama-cpp-avx512
|
||||
make llama-cpp-fallback
|
||||
make llama-cpp-grpc
|
||||
make llama-cpp-rpc-server
|
||||
make build-variants
|
||||
fi
|
||||
EOT
|
||||
|
||||
|
||||
# Copy libraries using a script to handle architecture differences
|
||||
RUN make -BC /LocalAI/backend/cpp/llama-cpp package
|
||||
RUN make -BC /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR} package
|
||||
|
||||
|
||||
FROM scratch
|
||||
|
||||
ARG BACKEND=llama-cpp
|
||||
ARG LLAMA_BACKEND_DIR=${BACKEND}
|
||||
|
||||
# Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
|
||||
COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./
|
||||
COPY --from=builder /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}/package/. ./
|
||||
|
||||
6
backend/cpp/llama-cpp-tq/Makefile
Normal file
6
backend/cpp/llama-cpp-tq/Makefile
Normal file
@@ -0,0 +1,6 @@
|
||||
LLAMA_VERSION?=master
|
||||
LLAMA_REPO?=https://github.com/TheTom/llama-cpp-turboquant
|
||||
BACKEND_NAME?=llama-cpp-tq
|
||||
SHARED_DIR?=$(CURDIR)/../llama-cpp
|
||||
|
||||
include ../llama-cpp/Makefile
|
||||
@@ -59,6 +59,11 @@ add_library(hw_grpc_proto
|
||||
|
||||
add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)
|
||||
|
||||
# Enable autoparser support if the header exists (not present in all llama.cpp forks)
|
||||
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/chat-auto-parser.h")
|
||||
target_compile_definitions(${TARGET} PRIVATE HAS_AUTOPARSER)
|
||||
endif()
|
||||
|
||||
target_include_directories(${TARGET} PRIVATE ../llava)
|
||||
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
||||
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
|
||||
LLAMA_VERSION?=95a6ebabb277c4cc18247e7bc2a5502133caca63
|
||||
LLAMA_VERSION?=a1cfb645307edc61a89e41557f290f441043d3c2
|
||||
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
|
||||
BACKEND_NAME?=llama-cpp
|
||||
SHARED_DIR?=$(CURDIR)
|
||||
GRPC_SERVER_DIR?=tools/grpc-server
|
||||
SERVER_SOURCE_DIR?=tools/server
|
||||
|
||||
CMAKE_ARGS?=
|
||||
BUILD_TYPE?=
|
||||
@@ -67,6 +71,17 @@ ifeq ($(BUILD_TYPE),sycl_f32)
|
||||
-DCMAKE_CXX_FLAGS="-fsycl"
|
||||
endif
|
||||
|
||||
# Variants to build for each architecture (can be overridden by forks)
|
||||
X86_64_VARIANTS ?= llama-cpp-avx llama-cpp-avx2 llama-cpp-avx512 llama-cpp-fallback llama-cpp-grpc llama-cpp-rpc-server
|
||||
ARM64_VARIANTS ?= llama-cpp-fallback llama-cpp-grpc llama-cpp-rpc-server
|
||||
|
||||
build-variants:
|
||||
ifeq ($(ARCH),aarch64)
|
||||
@for v in $(ARM64_VARIANTS); do $(MAKE) $$v || exit 1; done
|
||||
else
|
||||
@for v in $(X86_64_VARIANTS); do $(MAKE) $$v || exit 1; done
|
||||
endif
|
||||
|
||||
INSTALLED_PACKAGES=$(CURDIR)/../grpc/installed_packages
|
||||
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
|
||||
ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
||||
@@ -90,42 +105,42 @@ else
|
||||
endif
|
||||
|
||||
llama-cpp-avx2: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="$(BACKEND_NAME)-avx2-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build/grpc-server llama-cpp-avx2
|
||||
|
||||
llama-cpp-avx512: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="$(BACKEND_NAME)-avx512-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build/grpc-server llama-cpp-avx512
|
||||
|
||||
llama-cpp-avx: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="$(BACKEND_NAME)-avx-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build/grpc-server llama-cpp-avx
|
||||
|
||||
llama-cpp-fallback: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="$(BACKEND_NAME)-fallback-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build/grpc-server llama-cpp-fallback
|
||||
|
||||
llama-cpp-grpc: llama.cpp
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build
|
||||
$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="$(BACKEND_NAME)-grpc-build" build-llama-cpp-grpc-server
|
||||
cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build/grpc-server llama-cpp-grpc
|
||||
|
||||
llama-cpp-rpc-server: llama-cpp-grpc
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
||||
cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
|
||||
|
||||
llama.cpp:
|
||||
mkdir -p llama.cpp
|
||||
@@ -133,30 +148,30 @@ llama.cpp:
|
||||
git init && \
|
||||
git remote add origin $(LLAMA_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout -b build $(LLAMA_VERSION) && \
|
||||
(git checkout -b build $(LLAMA_VERSION) || git checkout -b build origin/$(LLAMA_VERSION)) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
llama.cpp/tools/grpc-server: llama.cpp
|
||||
mkdir -p llama.cpp/tools/grpc-server
|
||||
bash prepare.sh
|
||||
llama.cpp/$(GRPC_SERVER_DIR): llama.cpp
|
||||
mkdir -p llama.cpp/$(GRPC_SERVER_DIR)
|
||||
SHARED_DIR=$(SHARED_DIR) SERVER_SOURCE_DIR=$(SERVER_SOURCE_DIR) GRPC_SERVER_DIR=$(GRPC_SERVER_DIR) bash $(SHARED_DIR)/prepare.sh
|
||||
|
||||
rebuild:
|
||||
bash prepare.sh
|
||||
SHARED_DIR=$(SHARED_DIR) SERVER_SOURCE_DIR=$(SERVER_SOURCE_DIR) GRPC_SERVER_DIR=$(GRPC_SERVER_DIR) bash $(SHARED_DIR)/prepare.sh
|
||||
rm -rf grpc-server
|
||||
$(MAKE) grpc-server
|
||||
|
||||
package:
|
||||
bash package.sh
|
||||
bash $(SHARED_DIR)/package.sh
|
||||
|
||||
purge:
|
||||
rm -rf llama.cpp/build
|
||||
rm -rf llama.cpp/tools/grpc-server
|
||||
rm -rf llama.cpp/$(GRPC_SERVER_DIR)
|
||||
rm -rf grpc-server
|
||||
|
||||
clean: purge
|
||||
rm -rf llama.cpp
|
||||
|
||||
grpc-server: llama.cpp llama.cpp/tools/grpc-server
|
||||
grpc-server: llama.cpp llama.cpp/$(GRPC_SERVER_DIR)
|
||||
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
+bash -c "source $(ONEAPI_VARS); \
|
||||
|
||||
@@ -17,7 +17,9 @@
|
||||
#include "backend.pb.h"
|
||||
#include "backend.grpc.pb.h"
|
||||
#include "common.h"
|
||||
#ifdef HAS_AUTOPARSER
|
||||
#include "chat-auto-parser.h"
|
||||
#endif
|
||||
#include <getopt.h>
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
@@ -2665,6 +2667,7 @@ public:
|
||||
|
||||
response->set_rendered_template(rendered_template);
|
||||
|
||||
#ifdef HAS_AUTOPARSER
|
||||
// Run differential template analysis to detect tool format markers
|
||||
if (params_base.use_jinja) {
|
||||
try {
|
||||
@@ -2770,6 +2773,7 @@ public:
|
||||
SRV_WRN("ModelMetadata: failed to run autoparser analysis: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return grpc::Status::OK;
|
||||
}
|
||||
|
||||
@@ -5,14 +5,21 @@
|
||||
|
||||
set -e
|
||||
|
||||
CURDIR=$(dirname "$(realpath $0)")
|
||||
REPO_ROOT="${CURDIR}/../../.."
|
||||
# Use working directory (not script location) so forks that share this script work correctly
|
||||
CURDIR=$(pwd)
|
||||
SCRIPT_DIR=$(dirname "$(realpath $0)")
|
||||
REPO_ROOT="${SCRIPT_DIR}/../../.."
|
||||
|
||||
# Create lib directory
|
||||
mkdir -p $CURDIR/package/lib
|
||||
|
||||
cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
# Copy run.sh — prefer local copy, fall back to shared dir (script location)
|
||||
if [ -f "$CURDIR/run.sh" ]; then
|
||||
cp -rfv $CURDIR/run.sh $CURDIR/package/
|
||||
else
|
||||
cp -rfv $SCRIPT_DIR/run.sh $CURDIR/package/
|
||||
fi
|
||||
|
||||
# Detect architecture and copy appropriate libraries
|
||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||
|
||||
@@ -1,31 +1,43 @@
|
||||
#!/bin/bash
|
||||
|
||||
## Patches
|
||||
SHARED_DIR="${SHARED_DIR:-.}"
|
||||
SERVER_SOURCE_DIR="${SERVER_SOURCE_DIR:-tools/server}"
|
||||
GRPC_SERVER_DIR="${GRPC_SERVER_DIR:-tools/grpc-server}"
|
||||
|
||||
## Apply patches from the `patches` directory
|
||||
if [ -d "patches" ]; then
|
||||
for patch in $(ls patches); do
|
||||
echo "Applying patch $patch"
|
||||
patch -d llama.cpp/ -p1 < patches/$patch
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
for file in $(ls llama.cpp/tools/server/); do
|
||||
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
|
||||
# Copy server source files into grpc-server build directory
|
||||
for file in $(ls llama.cpp/${SERVER_SOURCE_DIR}/); do
|
||||
cp -rfv llama.cpp/${SERVER_SOURCE_DIR}/$file llama.cpp/${GRPC_SERVER_DIR}/
|
||||
done
|
||||
|
||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
||||
# Copy build files — prefer local overrides, fall back to SHARED_DIR
|
||||
for f in CMakeLists.txt grpc-server.cpp; do
|
||||
if [ -f "$f" ]; then
|
||||
cp -r "$f" llama.cpp/${GRPC_SERVER_DIR}/
|
||||
else
|
||||
cp -r "$SHARED_DIR/$f" llama.cpp/${GRPC_SERVER_DIR}/
|
||||
fi
|
||||
done
|
||||
|
||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/${GRPC_SERVER_DIR}/
|
||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/${GRPC_SERVER_DIR}/
|
||||
|
||||
# Add grpc-server subdirectory to the parent CMakeLists.txt
|
||||
PARENT_CMAKELISTS="llama.cpp/$(dirname ${GRPC_SERVER_DIR})/CMakeLists.txt"
|
||||
|
||||
set +e
|
||||
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
||||
if grep -q "grpc-server" "$PARENT_CMAKELISTS"; then
|
||||
echo "grpc-server already added"
|
||||
else
|
||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
||||
echo "add_subdirectory(grpc-server)" >> "$PARENT_CMAKELISTS"
|
||||
fi
|
||||
set -e
|
||||
|
||||
|
||||
@@ -29,6 +29,34 @@
|
||||
nvidia-cuda-12: "cuda12-llama-cpp"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp"
|
||||
- &llamacpp_tq
|
||||
name: "llama-cpp-tq"
|
||||
alias: "llama-cpp-tq"
|
||||
license: mit
|
||||
description: |
|
||||
TurboQuant llama.cpp fork - quantization research
|
||||
urls:
|
||||
- https://github.com/TheTom/llama-cpp-turboquant
|
||||
tags:
|
||||
- text-to-text
|
||||
- LLM
|
||||
- CPU
|
||||
- GPU
|
||||
- Metal
|
||||
- CUDA
|
||||
- HIP
|
||||
capabilities:
|
||||
default: "cpu-llama-cpp-tq"
|
||||
nvidia: "cuda12-llama-cpp-tq"
|
||||
intel: "intel-sycl-f16-llama-cpp-tq"
|
||||
amd: "rocm-llama-cpp-tq"
|
||||
metal: "metal-llama-cpp-tq"
|
||||
vulkan: "vulkan-llama-cpp-tq"
|
||||
nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-tq"
|
||||
nvidia-cuda-13: "cuda13-llama-cpp-tq"
|
||||
nvidia-cuda-12: "cuda12-llama-cpp-tq"
|
||||
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-tq"
|
||||
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-tq"
|
||||
- &whispercpp
|
||||
name: "whisper"
|
||||
alias: "whisper"
|
||||
@@ -1252,6 +1280,57 @@
|
||||
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-llama-cpp"
|
||||
mirrors:
|
||||
- localai/localai-backends:master-gpu-nvidia-cuda-13-llama-cpp
|
||||
# llama-cpp-tq (TurboQuant) concrete backends
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "cpu-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-cpu-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "cuda12-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "cuda13-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-nvidia-cuda-13-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "rocm-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-rocm-hipblas-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "intel-sycl-f16-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-intel-sycl-f16-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "intel-sycl-f32-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-intel-sycl-f32-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "vulkan-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-gpu-vulkan-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "metal-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-metal-darwin-arm64-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "nvidia-l4t-arm64-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-arm64-llama-cpp-tq
|
||||
- !!merge <<: *llamacpp_tq
|
||||
name: "cuda13-nvidia-l4t-arm64-llama-cpp-tq"
|
||||
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp-tq"
|
||||
mirrors:
|
||||
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp-tq
|
||||
## whisper
|
||||
- !!merge <<: *whispercpp
|
||||
name: "nvidia-l4t-arm64-whisper"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "⚡ GPU acceleration"
|
||||
title = "GPU Acceleration"
|
||||
weight = 9
|
||||
url = "/features/gpu-acceleration/"
|
||||
+++
|
||||
|
||||
@@ -27,8 +27,7 @@ LocalAI provides a comprehensive set of features for running AI models locally.
|
||||
- **[Realtime API](openai-realtime/)** - Low-latency multi-modal conversations (voice+text) over WebSocket
|
||||
- **[Constrained Grammars](constrained_grammars/)** - Control model output format with BNF grammars
|
||||
- **[GPU Acceleration](GPU-acceleration/)** - Optimize performance with GPU support
|
||||
- **[Distributed Inference](distributed_inferencing/)** - Scale inference across multiple nodes
|
||||
- **[Distributed Mode](distributed-mode/)** - Horizontal scaling with PostgreSQL, NATS, and remote backend nodes
|
||||
- **[Distribution](distribution/)** - Scale inference across multiple nodes (P2P federation or production distributed mode)
|
||||
- **[P2P API](p2p/)** - Monitor and manage P2P worker and federated nodes
|
||||
- **[Model Context Protocol (MCP)](mcp/)** - Enable agentic capabilities with MCP integration
|
||||
- **[Agents](agents/)** - Autonomous AI agents with tools, knowledge base, and skills
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🤖 Agents"
|
||||
title = "Agents"
|
||||
weight = 21
|
||||
url = '/features/agents'
|
||||
+++
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🔈 Audio to text"
|
||||
title = "Audio to Text"
|
||||
weight = 16
|
||||
url = "/features/audio-to-text/"
|
||||
+++
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🔐 Authentication & Authorization"
|
||||
title = "Authentication & Authorization"
|
||||
weight = 26
|
||||
url = '/features/authentication'
|
||||
+++
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
title: "⚙️ Backends"
|
||||
title: "Backends"
|
||||
description: "Learn how to use, manage, and develop backends in LocalAI"
|
||||
weight: 4
|
||||
url: "/backends/"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "✍️ Constrained Grammars"
|
||||
title = "Constrained Grammars"
|
||||
weight = 15
|
||||
url = "/features/constrained_grammars/"
|
||||
+++
|
||||
|
||||
@@ -5,7 +5,7 @@ weight = 14
|
||||
url = "/features/distributed-mode/"
|
||||
+++
|
||||
|
||||
Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach](/features/distribute/), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.
|
||||
Distributed mode enables horizontal scaling of LocalAI across multiple machines using **PostgreSQL** for state and node registry, and **NATS** for real-time coordination. Unlike the [P2P/federation approach]({{% relref "features/distributed_inferencing" %}}), distributed mode is designed for production deployments and Kubernetes environments where you need centralized management, health monitoring, and deterministic routing.
|
||||
|
||||
{{% notice note %}}
|
||||
Distributed mode requires authentication enabled with a **PostgreSQL** database — SQLite is not supported. This is because the node registry, job store, and other distributed state are stored in PostgreSQL tables.
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🆕🖧 Distributed Inference"
|
||||
title = "P2P / Federated Inference"
|
||||
weight = 15
|
||||
url = "/features/distribute/"
|
||||
+++
|
||||
|
||||
{{% notice tip %}}
|
||||
Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode](/features/distributed-mode/).
|
||||
Looking for production-grade horizontal scaling with PostgreSQL and NATS? See [Distributed Mode]({{% relref "features/distributed-mode" %}}).
|
||||
{{% /notice %}}
|
||||
|
||||
This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
|
||||
|
||||
34
docs/content/features/distribution.md
Normal file
34
docs/content/features/distribution.md
Normal file
@@ -0,0 +1,34 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "Distribution"
|
||||
weight = 13
|
||||
url = "/features/distribution/"
|
||||
+++
|
||||
|
||||
LocalAI supports distributing inference workloads across multiple machines. There are two approaches, each suited to different use cases:
|
||||
|
||||
## Distributed Mode (PostgreSQL + NATS)
|
||||
|
||||
Production-grade horizontal scaling with centralized management. Frontends are stateless LocalAI instances behind a load balancer; workers self-register and receive backends dynamically via NATS. State lives in PostgreSQL.
|
||||
|
||||
**Best for:** production deployments, Kubernetes, managed infrastructure.
|
||||
|
||||
[Read more]({{% relref "features/distributed-mode" %}})
|
||||
|
||||
## P2P / Federated Inference
|
||||
|
||||
Peer-to-peer networking via libp2p. Share a token to form a cluster with automatic discovery — no central server required. Supports federated load balancing and worker-mode weight sharding.
|
||||
|
||||
**Best for:** ad-hoc clusters, community sharing, quick experimentation.
|
||||
|
||||
[Read more]({{% relref "features/distributed_inferencing" %}})
|
||||
|
||||
## Quick Comparison
|
||||
|
||||
| | P2P / Federation | Distributed Mode |
|
||||
|---|---|---|
|
||||
| **Discovery** | Automatic via libp2p token | Self-registration to frontend URL |
|
||||
| **State storage** | In-memory / ledger | PostgreSQL |
|
||||
| **Coordination** | Gossip protocol | NATS messaging |
|
||||
| **Node management** | Automatic | REST API + WebUI |
|
||||
| **Setup complexity** | Minimal (share a token) | Requires PostgreSQL + NATS |
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🧠 Embeddings"
|
||||
title = "Embeddings"
|
||||
weight = 13
|
||||
url = "/features/embeddings/"
|
||||
+++
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🥽 GPT Vision"
|
||||
title = "GPT Vision"
|
||||
weight = 14
|
||||
url = "/features/gpt-vision/"
|
||||
+++
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🎨 Image generation"
|
||||
title = "Image Generation"
|
||||
weight = 12
|
||||
url = "/features/image-generation/"
|
||||
+++
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
+++
|
||||
title = "🔗 Model Context Protocol (MCP)"
|
||||
title = "Model Context Protocol (MCP)"
|
||||
weight = 20
|
||||
toc = true
|
||||
description = "Agentic capabilities with Model Context Protocol integration"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🖼️ Model gallery"
|
||||
title = "Model Gallery"
|
||||
weight = 18
|
||||
url = '/models'
|
||||
+++
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🔍 Object detection"
|
||||
title = "Object Detection"
|
||||
weight = 13
|
||||
url = "/features/object-detection/"
|
||||
+++
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🔥 OpenAI functions and tools"
|
||||
title = "OpenAI Functions and Tools"
|
||||
weight = 17
|
||||
url = "/features/openai-functions/"
|
||||
+++
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "📈 Reranker"
|
||||
title = "Reranker"
|
||||
weight = 11
|
||||
url = "/features/reranker/"
|
||||
+++
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
+++
|
||||
disableToc = false
|
||||
title = "⚙️ Runtime Settings"
|
||||
title = "Runtime Settings"
|
||||
weight = 25
|
||||
url = '/features/runtime-settings'
|
||||
+++
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "💾 Stores"
|
||||
title = "Stores"
|
||||
weight = 18
|
||||
url = '/stores'
|
||||
+++
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "📖 Text generation (GPT)"
|
||||
title = "Text Generation (GPT)"
|
||||
weight = 10
|
||||
url = "/features/text-generation/"
|
||||
+++
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "🗣 Text to audio (TTS)"
|
||||
title = "Text to Audio (TTS)"
|
||||
weight = 11
|
||||
url = "/features/text-to-audio/"
|
||||
+++
|
||||
|
||||
@@ -119,7 +119,7 @@ For production deployments or when you need more compute, LocalAI supports distr
|
||||
- **P2P federation**: Connect multiple LocalAI instances for load-balanced inference
|
||||
- **Model sharding**: Split large models across multiple machines
|
||||
|
||||
See the **Nodes** page in the web interface or the [Distribution docs]({{% relref "features/distribute" %}}) for setup instructions.
|
||||
See the **Nodes** page in the web interface or the [Distribution docs]({{% relref "features/distribution" %}}) for setup instructions.
|
||||
|
||||
## What's Next?
|
||||
|
||||
|
||||
@@ -72,9 +72,10 @@ Feel free to open up a Pull request (by clicking at the "Edit page" below) to ge
|
||||
|
||||
### Home Automation
|
||||
|
||||
- [hass-openai-custom-conversation](https://github.com/drndos/hass-openai-custom-conversation) — Home Assistant integration
|
||||
- [ha-llmvision](https://github.com/valentinfrlch/ha-llmvision) — Home Assistant LLM Vision
|
||||
- [HA-LocalAI-Monitor](https://github.com/loryanstrant/HA-LocalAI-Monitor) — Home Assistant monitoring
|
||||
- [Extended OpenAI Conversation](https://github.com/jekalmin/extended_openai_conversation) — Conversation agent for Home Assistant that supports a custom OpenAI endpoint
|
||||
- [LLM Vision](https://github.com/valentinfrlch/ha-llmvision) — Image & video feed analysis for Home Assistant
|
||||
- [OpenAI TTS Speech Service](https://github.com/sfortis/openai_tts) - OpenAI TTS custom component for Home Assistant
|
||||
- [LocalAI Monitor](https://github.com/loryanstrant/HA-LocalAI-Monitor) — Monitor & control of LocalAI from Home Assistant
|
||||
- Nextcloud [integration plugin](https://apps.nextcloud.com/apps/integration_openai) and [AI assistant](https://apps.nextcloud.com/apps/assistant)
|
||||
|
||||
### Automation & DevOps
|
||||
|
||||
Reference in New Issue
Block a user