Compare commits

..

7 Commits

Author SHA1 Message Date
Ettore Di Giacinto
ee625fc34e fix(backends gallery): pass-by backend galleries to the model service (#5906)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-07-25 16:38:09 +02:00
Ettore Di Giacinto
693aa0b5de Update README.md
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-07-25 11:51:23 +02:00
Ettore Di Giacinto
3973e6e5da fix(install.sh): update to use the new binary naming (#5903)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-07-25 10:43:22 +02:00
LocalAI [bot]
fb6ec68090 chore: ⬆️ Update ggml-org/whisper.cpp to 7de8dd783f7b2eab56bff6bbc5d3369e34f0e77f (#5902)
⬆️ Update ggml-org/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-07-25 08:40:24 +02:00
LocalAI [bot]
0301fc7c46 chore: ⬆️ Update leejet/stable-diffusion.cpp to eed97a5e1d054f9c1e7ac01982ae480411d4157e (#5901)
⬆️ Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-07-25 08:40:06 +02:00
LocalAI [bot]
813cb4296d chore: ⬆️ Update ggml-org/llama.cpp to 3f4fc97f1d745f1d5d3c853949503136d419e6de (#5900)
⬆️ Update ggml-org/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
2025-07-25 08:39:44 +02:00
Ettore Di Giacinto
deda3a4972 Update build documentation
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-07-24 22:53:08 +02:00
9 changed files with 56 additions and 166 deletions

View File

@@ -193,6 +193,7 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti
## 📰 Latest project news
- July 2025: All backends migrated outside of the main binary. LocalAI is now more lightweight, small, and automatically downloads the required backend to run the model. [Read the release notes](https://github.com/mudler/LocalAI/releases/tag/v3.2.0)
- June 2025: [Backend management](https://github.com/mudler/LocalAI/pull/5607) has been added. Attention: extras images are going to be deprecated from the next release! Read [the backend management PR](https://github.com/mudler/LocalAI/pull/5607).
- May 2025: [Audio input](https://github.com/mudler/LocalAI/pull/5466) and [Reranking](https://github.com/mudler/LocalAI/pull/5396) in llama.cpp backend, [Realtime API](https://github.com/mudler/LocalAI/pull/5392), Support to Gemma, SmollVLM, and more multimodal models (available in the gallery).
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=a86f52b2859dae4db5a7a0bbc0f1ad9de6b43ec6
LLAMA_VERSION?=3f4fc97f1d745f1d5d3c853949503136d419e6de
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -19,7 +19,7 @@ LD_FLAGS?=
# stablediffusion.cpp (ggml)
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
STABLEDIFFUSION_GGML_VERSION?=1896b28ef2fd5b3643120e66979bea487385439f
STABLEDIFFUSION_GGML_VERSION?=eed97a5e1d054f9c1e7ac01982ae480411d4157e
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -6,7 +6,7 @@ CMAKE_ARGS?=
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=1f5cf0b2888402d57bb17b2029b2caa97e5f3baf
WHISPER_CPP_VERSION?=7de8dd783f7b2eab56bff6bbc5d3369e34f0e77f
export WHISPER_CMAKE_ARGS?=-DBUILD_SHARED_LIBS=OFF
export WHISPER_DIR=$(abspath ./sources/whisper.cpp)

View File

@@ -15,9 +15,10 @@ import (
)
type ModelGalleryEndpointService struct {
galleries []config.Gallery
modelPath string
galleryApplier *services.GalleryService
galleries []config.Gallery
backendGalleries []config.Gallery
modelPath string
galleryApplier *services.GalleryService
}
type GalleryModel struct {
@@ -25,11 +26,12 @@ type GalleryModel struct {
gallery.GalleryModel
}
func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
func CreateModelGalleryEndpointService(galleries []config.Gallery, backendGalleries []config.Gallery, modelPath string, galleryApplier *services.GalleryService) ModelGalleryEndpointService {
return ModelGalleryEndpointService{
galleries: galleries,
modelPath: modelPath,
galleryApplier: galleryApplier,
galleries: galleries,
backendGalleries: backendGalleries,
modelPath: modelPath,
galleryApplier: galleryApplier,
}
}
@@ -79,6 +81,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
ID: uuid.String(),
GalleryElementName: input.ID,
Galleries: mgs.galleries,
BackendGalleries: mgs.backendGalleries,
}
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: fmt.Sprintf("%smodels/jobs/%s", utils.BaseURL(c), uuid.String())})

View File

@@ -23,7 +23,7 @@ func RegisterLocalAIRoutes(router *fiber.App,
// LocalAI API endpoints
if !appConfig.DisableGalleryEndpoint {
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.BackendGalleries, appConfig.ModelPath, galleryService)
router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())

View File

@@ -180,6 +180,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
ID: uid,
GalleryElementName: galleryID,
Galleries: appConfig.Galleries,
BackendGalleries: appConfig.BackendGalleries,
}
go func() {
galleryService.ModelGalleryChannel <- op
@@ -219,6 +220,7 @@ func registerGalleryRoutes(app *fiber.App, cl *config.BackendConfigLoader, appCo
Delete: true,
GalleryElementName: galleryName,
Galleries: appConfig.Galleries,
BackendGalleries: appConfig.BackendGalleries,
}
go func() {
galleryService.ModelGalleryChannel <- op

View File

@@ -9,13 +9,11 @@ ico = "rocket_launch"
### Build
LocalAI can be built as a container image or as a single, portable binary. Note that some model architectures might require Python libraries, which are not included in the binary. The binary contains only the core backends written in Go and C++.
LocalAI can be built as a container image or as a single, portable binary. Note that some model architectures might require Python libraries, which are not included in the binary.
LocalAI's extensible architecture allows you to add your own backends, which can be written in any language, and as such the container images contains also the Python dependencies to run all the available backends (for example, in order to run backends like __Diffusers__ that allows to generate images and videos from text).
In some cases you might want to re-build LocalAI from source (for instance to leverage Apple Silicon acceleration), or to build a custom container image with your own backends. This section contains instructions on how to build LocalAI from source.
This section contains instructions on how to build LocalAI from source.
#### Build LocalAI locally
@@ -24,7 +22,6 @@ In some cases you might want to re-build LocalAI from source (for instance to le
In order to build LocalAI locally, you need the following requirements:
- Golang >= 1.21
- Cmake/make
- GCC
- GRPC
@@ -36,20 +33,14 @@ To install the dependencies follow the instructions below:
Install `xcode` from the App Store
```bash
brew install abseil cmake go grpc protobuf protoc-gen-go protoc-gen-go-grpc python wget
```
After installing the above dependencies, you need to install grpcio-tools from PyPI. You could do this via a pip --user install or a virtualenv.
```bash
pip install --user grpcio-tools
brew install go protobuf protoc-gen-go protoc-gen-go-grpc wget
```
{{% /tab %}}
{{% tab tabName="Debian" %}}
```bash
apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-tools
apt install golang make protobuf-compiler-grpc
```
After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
@@ -63,10 +54,8 @@ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f1
{{% /tab %}}
{{% tab tabName="From source" %}}
Specify `BUILD_GRPC_FOR_BACKEND_LLAMA=true` to build automatically the gRPC dependencies
```bash
make ... BUILD_GRPC_FOR_BACKEND_LLAMA=true build
make build
```
{{% /tab %}}
@@ -83,36 +72,6 @@ make build
This should produce the binary `local-ai`
Here is the list of the variables available that can be used to customize the build:
| Variable | Default | Description |
| ---------------------| ------- | ----------- |
| `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas`, `sycl_f16`, `sycl_f32` |
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts` |
| `CLBLAST_DIR` | | Specify a CLBlast directory |
| `CUDA_LIBPATH` | | Specify a CUDA library path |
| `BUILD_API_ONLY` | false | Set to true to build only the API (no backends will be built) |
{{% alert note %}}
#### CPU flagset compatibility
LocalAI uses different backends based on ggml and llama.cpp to run models. If your CPU doesn't support common instruction sets, you can disable them during build:
```
CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" make build
```
To have effect on the container image, you need to set `REBUILD=true`:
```
docker run quay.io/go-skynet/localai
docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -e REBUILD=true -e CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_AVX=OFF -DGGML_FMA=OFF" -v $PWD/models:/models quay.io/go-skynet/local-ai:latest
```
{{% /alert %}}
#### Container image
Requirements:
@@ -153,6 +112,9 @@ wget https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf -O
# Use a template from the examples
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/phi-2.Q2_K.tmpl
# Install the llama-cpp backend
./local-ai backends install llama-cpp
# Run LocalAI
./local-ai --models-path=./models/ --debug=true
@@ -186,131 +148,53 @@ sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
```
# reinstall build dependencies
brew reinstall abseil cmake go grpc protobuf wget
brew reinstall go grpc protobuf wget
make clean
make build
```
**Requirements**: OpenCV, Gomp
## Build backends
Image generation requires `GO_TAGS=stablediffusion` to be set during build:
LocalAI have several backends available for installation in the backend gallery. The backends can be also built by source. As backends might vary from language and dependencies that they require, the documentation will provide generic guidance for few of the backends, which can be applied with some slight modifications also to the others.
### Manually
Typically each backend include a Makefile which allow to package the backend.
In the LocalAI repository, for instance you can build `bark-cpp` by doing:
```
make GO_TAGS=stablediffusion build
git clone https://github.com/go-skynet/LocalAI.git
# Build the bark-cpp backend (requires cmake)
make -C LocalAI/backend/go/bark-cpp build package
# Build vllm backend (requires python)
make -C LocalAI/backend/python/vllm
```
### Build with Text to audio support
### With Docker
**Requirements**: piper-phonemize
Building with docker is simpler as abstracts away all the requirement, and focuses on building the final OCI images that are available in the gallery. This allows for instance also to build locally a backend and install it with LocalAI. You can refer to [Backends](https://localai.io/backends/) for general guidance on how to install and develop backends.
Text to audio support is experimental and requires `GO_TAGS=tts` to be set during build:
In the LocalAI repository, you can build `bark-cpp` by doing:
```
make GO_TAGS=tts build
git clone https://github.com/go-skynet/LocalAI.git
# Build the bark-cpp backend (requires docker)
make docker-build-bark-cpp
```
### Acceleration
#### OpenBLAS
Software acceleration.
Requirements: OpenBLAS
```
make BUILD_TYPE=openblas build
```
#### CuBLAS
Nvidia Acceleration.
Requirement: Nvidia CUDA toolkit
Note: CuBLAS support is experimental, and has not been tested on real HW. please report any issues you find!
```
make BUILD_TYPE=cublas build
```
More informations available in the upstream PR: https://github.com/ggerganov/llama.cpp/pull/1412
#### Hipblas (AMD GPU with ROCm on Arch Linux)
Packages:
```
pacman -S base-devel git rocm-hip-sdk rocm-opencl-sdk opencv clblast grpc
```
Library links:
```
export CGO_CFLAGS="-I/usr/include/opencv4"
export CGO_CXXFLAGS="-I/usr/include/opencv4"
export CGO_LDFLAGS="-L/opt/rocm/hip/lib -lamdhip64 -L/opt/rocm/lib -lOpenCL -L/usr/lib -lclblast -lrocblas -lhipblas -lrocrand -lomp -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link"
```
Build:
```
make BUILD_TYPE=hipblas GPU_TARGETS=gfx1030
```
#### ClBLAS
AMD/Intel GPU acceleration.
Requirement: OpenCL, CLBlast
```
make BUILD_TYPE=clblas build
```
To specify a clblast dir set: `CLBLAST_DIR`
#### Intel GPU acceleration
Intel GPU acceleration is supported via SYCL.
Requirements: [Intel oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html) (see also [llama.cpp setup installations instructions](https://github.com/ggerganov/llama.cpp/blob/d71ac90985854b0905e1abba778e407e17f9f887/README-sycl.md?plain=1#L56))
```
make BUILD_TYPE=sycl_f16 build # for float16
make BUILD_TYPE=sycl_f32 build # for float32
```
#### Metal (Apple Silicon)
```
make build
# correct build type is automatically used on mac (BUILD_TYPE=metal)
# Set `gpu_layers: 256` (or equal to the number of model layers) to your YAML model config file and `f16: true`
```
### Windows compatibility
Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
### Examples
More advanced build options are available, for instance to build only a single backend.
#### Build only a single backend
You can control the backends that are built by setting the `GRPC_BACKENDS` environment variable. For instance, to build only the `llama-cpp` backend only:
Note that `make` is only by convenience, in reality it just runs a simple `docker` command as:
```bash
make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f LocalAI/backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .
```
By default, all the backends are built.
Note:
#### Specific llama.cpp version
To build with a specific version of llama.cpp, set `CPPLLAMA_VERSION` to the tag or wanted sha:
```
CPPLLAMA_VERSION=<sha> make build
```
- BUILD_TYPE can be either: `cublas`, `hipblas`, `sycl_f16`, `sycl_f32`, `metal`.
- BASE_IMAGE is tested on `ubuntu:22.04` (and defaults to it)

View File

@@ -757,7 +757,7 @@ install_binary_darwin() {
[ "$(uname -s)" = "Darwin" ] || fatal 'This script is intended to run on macOS only.'
info "Downloading LocalAI ${LOCALAI_VERSION}..."
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Darwin-${ARCH}"
curl --fail --show-error --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-${LOCALAI_VERSION}-darwin-${ARCH}"
info "Installing to /usr/local/bin/local-ai"
install -o0 -g0 -m755 $TEMP_DIR/local-ai /usr/local/bin/local-ai
@@ -789,7 +789,7 @@ install_binary() {
fi
info "Downloading LocalAI ${LOCALAI_VERSION}..."
curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-Linux-${ARCH}"
curl --fail --location --progress-bar -o $TEMP_DIR/local-ai "https://github.com/mudler/LocalAI/releases/download/${LOCALAI_VERSION}/local-ai-${LOCALAI_VERSION}-linux-${ARCH}"
for BINDIR in /usr/local/bin /usr/bin /bin; do
echo $PATH | grep -q $BINDIR && break || continue
@@ -868,7 +868,7 @@ OS="$(uname -s)"
ARCH=$(uname -m)
case "$ARCH" in
x86_64) ARCH="x86_64" ;;
x86_64) ARCH="amd64" ;;
aarch64|arm64) ARCH="arm64" ;;
*) fatal "Unsupported architecture: $ARCH" ;;
esac