chore: drop AIO images (#9004)

AIO images are behind, and takes effort to maintain these. Wizard and
installation of models have been semplified massively, so AIO images
lost their purpose.

This allows us to be more laser focused on main images and reliefes
stress from CI.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-03-14 17:49:36 +01:00
committed by GitHub
parent 0ac4ac5bdd
commit 5affb747a9
44 changed files with 68 additions and 988 deletions

View File

@@ -26,7 +26,6 @@
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
ubuntu-version: ${{ matrix.ubuntu-version }}
ubuntu-codename: ${{ matrix.ubuntu-codename }}
@@ -46,7 +45,6 @@
grpc-base-image: "ubuntu:24.04"
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-hipblas"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
@@ -61,7 +59,6 @@
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}
@@ -83,7 +80,6 @@
tag-suffix: ''
base-image: "ubuntu:24.04"
runs-on: 'ubuntu-latest'
aio: "-aio-cpu"
makeflags: "--jobs=4 --output-sync=target"
skip-drivers: 'false'
ubuntu-version: '2404'
@@ -98,7 +94,6 @@
base-image: "ubuntu:24.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-nvidia-cuda-12"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
- build-type: 'cublas'
@@ -111,7 +106,6 @@
base-image: "ubuntu:22.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-nvidia-cuda-13"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
- build-type: 'vulkan'
@@ -122,7 +116,6 @@
base-image: "ubuntu:24.04"
skip-drivers: 'false'
makeflags: "--jobs=4 --output-sync=target"
aio: "-aio-gpu-vulkan"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
- build-type: 'intel'
@@ -133,7 +126,6 @@
tag-suffix: '-gpu-intel'
runs-on: 'ubuntu-latest'
makeflags: "--jobs=3 --output-sync=target"
aio: "-aio-gpu-intel"
ubuntu-version: '2404'
ubuntu-codename: 'noble'
@@ -148,7 +140,6 @@
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
aio: ${{ matrix.aio }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
makeflags: ${{ matrix.makeflags }}

View File

@@ -51,11 +51,6 @@ on:
required: false
default: '--jobs=4 --output-sync=target'
type: string
aio:
description: 'AIO Image Name'
required: false
default: ''
type: string
ubuntu-version:
description: 'Ubuntu version'
required: false
@@ -177,34 +172,6 @@ jobs:
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.tag-suffix }}
- name: Docker meta AIO (quay.io)
if: inputs.aio != ''
id: meta_aio
uses: docker/metadata-action@v6
with:
images: |
quay.io/go-skynet/local-ai
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }},onlatest=true
- name: Docker meta AIO (dockerhub)
if: inputs.aio != ''
id: meta_aio_dockerhub
uses: docker/metadata-action@v6
with:
images: |
localai/localai
tags: |
type=ref,event=branch
type=semver,pattern={{raw}}
flavor: |
latest=${{ inputs.tag-latest }}
suffix=${{ inputs.aio }},onlatest=true
- name: Set up QEMU
uses: docker/setup-qemu-action@master
with:
@@ -287,41 +254,6 @@ jobs:
tags: ${{ steps.meta_pull_request.outputs.tags }}
labels: ${{ steps.meta_pull_request.outputs.labels }}
## End testing image
- name: Build and push AIO image
if: inputs.aio != ''
uses: docker/build-push-action@v7
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile.aio
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta_aio.outputs.tags }}
labels: ${{ steps.meta_aio.outputs.labels }}
- name: Build and push AIO image (dockerhub)
if: inputs.aio != ''
uses: docker/build-push-action@v7
with:
builder: ${{ steps.buildx.outputs.name }}
build-args: |
BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
MAKEFLAGS=${{ inputs.makeflags }}
context: .
file: ./Dockerfile.aio
platforms: ${{ inputs.platforms }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
- name: job summary
run: |
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
- name: job summary(AIO)
if: inputs.aio != ''
run: |
echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

View File

@@ -116,7 +116,7 @@ jobs:
connect-timeout-seconds: 180
limit-access-to-actor: true
tests-aio-container:
tests-e2e-container:
runs-on: ubuntu-latest
steps:
- name: Release space from worker
@@ -166,7 +166,7 @@ jobs:
PATH="$PATH:$HOME/go/bin" make protogen-go
- name: Test
run: |
PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-e2e e2e-aio
- name: Setup tmate session if tests fail
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3.23

1
.gitignore vendored
View File

@@ -37,7 +37,6 @@ models/*
test-models/
test-dir/
tests/e2e-aio/backends
tests/e2e-aio/models
mock-backend
release/

View File

@@ -244,19 +244,16 @@ The e2e tests run LocalAI in a Docker container and exercise the API:
make test-e2e
```
### Running AIO tests
### Running E2E container tests
All-In-One images have a set of tests that automatically verify that most of the endpoints work correctly:
These tests build a standard LocalAI Docker image and run it with pre-configured model configs to verify that most endpoints work correctly:
```bash
# Build the LocalAI docker image
make DOCKER_IMAGE=local-ai docker
make docker-build-e2e
# Build the corresponding AIO image
BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
# Run the AIO e2e tests
LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
# Run the e2e tests (uses model configs from tests/e2e-aio/models/)
make e2e-aio
```
### Testing backends

View File

@@ -1,8 +0,0 @@
ARG BASE_IMAGE=ubuntu:24.04
FROM ${BASE_IMAGE}
RUN apt-get update && apt-get install -y pciutils && apt-get clean
COPY aio/ /aio
ENTRYPOINT [ "/aio/entrypoint.sh" ]

View File

@@ -172,10 +172,10 @@ test: test-models/testmodel.ggml protogen-go
$(MAKE) test-stablediffusion
########################################################
## AIO tests
## E2E AIO tests (uses standard image with pre-configured models)
########################################################
docker-build-aio:
docker-build-e2e:
docker build \
--build-arg MAKEFLAGS="--jobs=5 --output-sync=target" \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
@@ -187,13 +187,12 @@ docker-build-aio:
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
--build-arg GO_TAGS="$(GO_TAGS)" \
-t local-ai:tests -f Dockerfile .
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
e2e-aio:
LOCALAI_BACKEND_DIR=$(abspath ./backends) \
LOCALAI_MODELS_DIR=$(abspath ./models) \
LOCALAI_IMAGE_TAG=test \
LOCALAI_IMAGE=local-ai-aio \
LOCALAI_MODELS_DIR=$(abspath ./tests/e2e-aio/models) \
LOCALAI_IMAGE_TAG=tests \
LOCALAI_IMAGE=local-ai \
$(MAKE) run-e2e-aio
run-e2e-aio: protogen-go
@@ -443,7 +442,6 @@ test-extra: prepare-test-extra
$(MAKE) -C backend/python/ace-step test
DOCKER_IMAGE?=local-ai
DOCKER_AIO_IMAGE?=local-ai-aio
IMAGE_TYPE?=core
BASE_IMAGE?=ubuntu:24.04
@@ -473,21 +471,6 @@ docker-cuda12:
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-t $(DOCKER_IMAGE)-cuda-12 .
docker-aio:
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
docker build \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
docker-aio-all:
$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
docker-image-intel:
docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \

View File

@@ -194,27 +194,6 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/de
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
```
#### AIO Images (pre-downloaded models):
```bash
# CPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# NVIDIA CUDA 13 version
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-13
# NVIDIA CUDA 12 version
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
# Intel GPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
# AMD GPU version
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
```
For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
To load models:
```bash
@@ -250,7 +229,7 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti
- May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
- Apr 2025: Rebrand, WebUI enhancements
- Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
- Apr 2025: WebUI overhaul, AIO images updates
- Apr 2025: WebUI overhaul
- Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
- Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
- Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )

View File

@@ -1,5 +0,0 @@
## AIO CPU size
Use this image with CPU-only.
Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).

View File

@@ -1,13 +0,0 @@
embeddings: true
name: text-embedding-ada-002
backend: llama-cpp
parameters:
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}'

View File

@@ -1,33 +0,0 @@
name: jina-reranker-v1-base-en
reranking: true
f16: true
parameters:
model: jina-reranker-v1-tiny-en.f16.gguf
backend: llama-cpp
download_files:
- filename: jina-reranker-v1-tiny-en.f16.gguf
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'

View File

@@ -1,18 +0,0 @@
name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@@ -1,15 +0,0 @@
name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
backend: piper
parameters:
model: en-us-amy-low.onnx
usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"voice-en-us-amy-low",
"input": "Hi, this is a test."
}'

View File

@@ -1,8 +0,0 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -1,50 +0,0 @@
context_size: 4096
f16: true
backend: llama-cpp
mmap: true
mmproj: minicpm-v-4_5-mmproj-f16.gguf
name: gpt-4o
parameters:
model: minicpm-v-4_5-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
completion: |
{{.Input}}
function: |
<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8

View File

@@ -1,138 +0,0 @@
#!/bin/bash
echo "===> LocalAI All-in-One (AIO) container starting..."
GPU_ACCELERATION=false
GPU_VENDOR=""
function check_intel() {
if lspci | grep -E 'VGA|3D' | grep -iq intel; then
echo "Intel GPU detected"
if [ -d /opt/intel ]; then
GPU_ACCELERATION=true
GPU_VENDOR=intel
else
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
fi
fi
}
function check_nvidia_wsl() {
if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
# We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
# Make sure the container was run with `--gpus all` as the only required parameter
echo "NVIDIA GPU detected via WSL2"
# nvidia-smi should be installed in the container
if nvidia-smi; then
GPU_ACCELERATION=true
GPU_VENDOR=nvidia
else
echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
fi
fi
}
function check_amd() {
if lspci | grep -E 'VGA|3D' | grep -iq amd; then
echo "AMD GPU detected"
# Check if ROCm is installed
if [ -d /opt/rocm ]; then
GPU_ACCELERATION=true
GPU_VENDOR=amd
else
echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
fi
fi
}
function check_nvidia() {
if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
echo "NVIDIA GPU detected"
# nvidia-smi should be installed in the container
if nvidia-smi; then
GPU_ACCELERATION=true
GPU_VENDOR=nvidia
else
echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
fi
fi
}
function check_metal() {
if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
echo "Apple Metal supported GPU detected"
GPU_ACCELERATION=true
GPU_VENDOR=apple
fi
}
function detect_gpu() {
case "$(uname -s)" in
Linux)
check_nvidia
check_amd
check_intel
check_nvidia_wsl
;;
Darwin)
check_metal
;;
esac
}
function detect_gpu_size() {
# Attempting to find GPU memory size for NVIDIA GPUs
if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
echo "NVIDIA GPU detected. Attempting to find memory size..."
# Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
# If handling multiple GPUs is required in the future, this is the place to do it
nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
if [ ! -z "$nvidia_sm" ]; then
echo "Total GPU Memory: $nvidia_sm MiB"
# if bigger than 8GB, use 16GB
#if [ "$nvidia_sm" -gt 8192 ]; then
# GPU_SIZE=gpu-16g
#else
GPU_SIZE=gpu-8g
#fi
else
echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
GPU_SIZE=gpu-8g
fi
elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
GPU_SIZE=intel
# Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
elif [ "$GPU_ACCELERATION" = true ]; then
echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
GPU_SIZE=gpu-8g
# default to cpu if GPU_SIZE is not set
else
echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
GPU_SIZE=cpu
fi
}
function check_vars() {
if [ -z "$MODELS" ]; then
echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
exit 1
fi
if [ -z "$PROFILE" ]; then
echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
exit 1
fi
}
detect_gpu
detect_gpu_size
PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
check_vars
echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
exec /entrypoint.sh "$@"

View File

@@ -1,13 +0,0 @@
embeddings: true
name: text-embedding-ada-002
backend: llama-cpp
parameters:
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}'

View File

@@ -1,25 +0,0 @@
name: stablediffusion
parameters:
model: DreamShaper_8_pruned.safetensors
backend: diffusers
step: 25
f16: true
diffusers:
pipeline_type: StableDiffusionPipeline
cuda: true
enable_parameters: "negative_prompt,num_inference_steps"
scheduler_type: "k_dpmpp_2m"
download_files:
- filename: DreamShaper_8_pruned.safetensors
uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"step": 25,
"size": "512x512"
}'

View File

@@ -1,33 +0,0 @@
name: jina-reranker-v1-base-en
reranking: true
f16: true
parameters:
model: jina-reranker-v1-tiny-en.f16.gguf
backend: llama-cpp
download_files:
- filename: jina-reranker-v1-tiny-en.f16.gguf
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'

View File

@@ -1,18 +0,0 @@
name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@@ -1,15 +0,0 @@
name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
backend: piper
parameters:
model: en-us-amy-low.onnx
usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"tts-1",
"input": "Hi, this is a test."
}'

View File

@@ -1,54 +0,0 @@
context_size: 4096
f16: true
backend: llama-cpp
function:
capture_llm_results:
- (?s)<Thought>(.*?)</Thought>
grammar:
properties_order: name,arguments
json_regex_match:
- (?s)<Output>(.*?)</Output>
replace_llm_results:
- key: (?s)<Thought>(.*?)</Thought>
value: ""
mmap: true
name: gpt-4
parameters:
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
completion: |
{{.Input}}
function: |
<|im_start|>system
You are an AI assistant that executes function calls, and these are the tools at your disposal:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf

View File

@@ -1,8 +0,0 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -1,50 +0,0 @@
context_size: 4096
backend: llama-cpp
f16: true
mmap: true
mmproj: minicpm-v-4_5-mmproj-f16.gguf
name: gpt-4o
parameters:
model: minicpm-v-4_5-Q4_K_M.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
completion: |
{{.Input}}
function: |
<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8

View File

@@ -1,13 +0,0 @@
embeddings: true
name: text-embedding-ada-002
backend: llama-cpp
parameters:
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "text-embedding-ada-002"
}'

View File

@@ -1,20 +0,0 @@
name: stablediffusion
parameters:
model: Lykon/dreamshaper-8
backend: diffusers
step: 25
f16: true
diffusers:
pipeline_type: StableDiffusionPipeline
cuda: true
enable_parameters: "negative_prompt,num_inference_steps"
scheduler_type: "k_dpmpp_2m"
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"step": 25,
"size": "512x512"
}'

View File

@@ -1,33 +0,0 @@
name: jina-reranker-v1-base-en
reranking: true
f16: true
parameters:
model: jina-reranker-v1-tiny-en.f16.gguf
backend: llama-cpp
download_files:
- filename: jina-reranker-v1-tiny-en.f16.gguf
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'

View File

@@ -1,18 +0,0 @@
name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper-1"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@@ -1,15 +0,0 @@
name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
backend: piper
parameters:
model: en-us-amy-low.onnx
usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"tts-1",
"input": "Hi, this is a test."
}'

View File

@@ -1,54 +0,0 @@
context_size: 4096
f16: true
backend: llama-cpp
function:
capture_llm_results:
- (?s)<Thought>(.*?)</Thought>
grammar:
properties_order: name,arguments
json_regex_match:
- (?s)<Output>(.*?)</Output>
replace_llm_results:
- key: (?s)<Thought>(.*?)</Thought>
value: ""
mmap: true
name: gpt-4
parameters:
model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
stopwords:
- <|im_end|>
- <dummy32000>
- </s>
template:
chat: |
{{.Input -}}
<|im_start|>assistant
chat_message: |
<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>
completion: |
{{.Input}}
function: |
<|im_start|>system
You are an AI assistant that executes function calls, and these are the tools at your disposal:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
<|im_end|>
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf

View File

@@ -206,7 +206,7 @@ The following are examples of the ROCm specific configuration elements required.
```yaml
# For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
image: quay.io/go-skynet/local-ai:master-gpu-hipblas
environment:
- DEBUG=true
# If your gpu is not already included in the current list of default targets the following build details are required.
@@ -229,13 +229,11 @@ docker run \
-e GPU_TARGETS=gfx906 \
--device /dev/dri \
--device /dev/kfd \
quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
quay.io/go-skynet/local-ai:master-gpu-hipblas
```
Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command.
The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size.
#### Example (k8s) (Advanced Deployment/WIP)
For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu).
@@ -434,7 +432,7 @@ If your AMD GPU is not in the default target list, set `REBUILD=true` and `GPU_T
```bash
docker run -e REBUILD=true -e BUILD_TYPE=hipblas -e GPU_TARGETS=gfx1030 \
--device /dev/dri --device /dev/kfd \
quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
quay.io/go-skynet/local-ai:master-gpu-hipblas
```
### Intel SYCL: model hangs

View File

@@ -32,6 +32,4 @@ Grammars and function tools can be used as well in conjunction with vision APIs:
### Setup
All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case.
To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI-examples/blob/main/configurations/llava/llava.yaml).

View File

@@ -8,8 +8,6 @@ ico = "rocket_launch"
LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai).
All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed.
For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "installation/build" %}}).
{{% notice tip %}}
@@ -17,7 +15,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
**Available Images Types**:
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.
{{% /notice %}}
@@ -124,109 +121,6 @@ These images are compatible with Nvidia ARM64 devices with CUDA 13, such as the
{{< /tabs >}}
## All-in-one images
All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and require no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
| Category | Model name | Real model (CPU) | Real model (GPU) |
| ---- | ---- | ---- | ---- |
| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` |
| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` |
| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |
### Usage
Select the image (CPU or GPU) and start the container with Docker:
```bash
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
```
LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models).
Or with a docker-compose file:
```yaml
version: "3.9"
services:
api:
image: localai/localai:latest-aio-cpu
# For a specific version:
# image: localai/localai:{{< version >}}-aio-cpu
# For Nvidia GPUs decomment one of the following (cuda12 or cuda13):
# image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12
# image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-13
# image: localai/localai:latest-aio-gpu-nvidia-cuda-12
# image: localai/localai:latest-aio-gpu-nvidia-cuda-13
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
interval: 1m
timeout: 20m
retries: 5
ports:
- 8080:8080
environment:
- DEBUG=true
# ...
volumes:
- ./models:/models:cached
# decomment the following piece if running with Nvidia GPUs
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
```
{{% notice tip %}}
**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`).
If you want to use a named model or a local directory, you can mount it as a volume to `/models`:
```bash
docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu
```
or associate a volume:
```bash
docker volume create localai-models
docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu
```
{{% /notice %}}
### Available AIO images
| Description | Quay | Docker Hub |
| --- | --- |-----------------------------------------------|
| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` |
| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` |
| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` |
| Latest images for Nvidia GPU (CUDA13) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-13` | `localai/localai:latest-aio-gpu-nvidia-cuda-13` |
| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` |
| Latest images for Intel GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel` | `localai/localai:latest-aio-gpu-intel` |
### Available environment variables
The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image
| Variable | Default | Description |
| ---------------------| ------- | ----------- |
| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "getting-started/models" %}})) |
## See Also
- [GPU acceleration]({{%relref "features/gpu-acceleration" %}})

View File

@@ -20,7 +20,7 @@ With the CLI you can list the models with `local-ai models list` and install the
You can also [run models manually]({{%relref "getting-started/models" %}}) by copying files into the `models` directory.
{{% /notice %}}
You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed.
You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are examples - replace them with the model names you have installed.
### Text Generation

View File

@@ -30,7 +30,7 @@ docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
podman run -p 8080:8080 --name local-ai -ti localai/localai:latest
```
This will start LocalAI. The API will be available at `http://localhost:8080`. For images with pre-configured models, see [All-in-One images](/getting-started/container-images/#all-in-one-images).
This will start LocalAI. The API will be available at `http://localhost:8080`.
For other platforms:
- **macOS**: Download the [DMG](macos/)

View File

@@ -93,48 +93,6 @@ CUDA 13 (for Nvidia DGX Spark):
docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64-cuda-13
```
### All-in-One (AIO) Images
**Recommended for beginners** - These images come pre-configured with models and backends, ready to use immediately.
#### CPU Image
```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
# Or with Podman:
podman run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
```
#### GPU Images
**NVIDIA CUDA 13:**
```bash
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-13
# Or with Podman:
podman run -ti --name local-ai -p 8080:8080 --device nvidia.com/gpu=all localai/localai:latest-aio-gpu-nvidia-cuda-13
```
**NVIDIA CUDA 12:**
```bash
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
# Or with Podman:
podman run -ti --name local-ai -p 8080:8080 --device nvidia.com/gpu=all localai/localai:latest-aio-gpu-nvidia-cuda-12
```
**AMD GPU (ROCm):**
```bash
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
# Or with Podman:
podman run -ti --name local-ai -p 8080:8080 --device rocm.com/gpu=all localai/localai:latest-aio-gpu-hipblas
```
**Intel GPU:**
```bash
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
# Or with Podman:
podman run -ti --name local-ai -p 8080:8080 --device gpu.intel.com/all localai/localai:latest-aio-gpu-intel
```
## Using Compose
For a more manageable setup, especially with persistent volumes, use Docker Compose or Podman Compose:
@@ -147,8 +105,8 @@ The CDI approach is recommended for newer versions of the NVIDIA Container Toolk
version: "3.9"
services:
api:
image: localai/localai:latest-aio-gpu-nvidia-cuda-12
# For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13
image: localai/localai:latest-gpu-nvidia-cuda-12
# For CUDA 13, use: localai/localai:latest-gpu-nvidia-cuda-13
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
interval: 1m
@@ -187,8 +145,8 @@ If you are using an older version of the NVIDIA Container Toolkit (before 1.14),
version: "3.9"
services:
api:
image: localai/localai:latest-aio-gpu-nvidia-cuda-12
# For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13
image: localai/localai:latest-gpu-nvidia-cuda-12
# For CUDA 13, use: localai/localai:latest-gpu-nvidia-cuda-13
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
interval: 1m
@@ -227,12 +185,12 @@ To persist models and data, mount volumes:
docker run -ti --name local-ai -p 8080:8080 \
-v $PWD/models:/models \
-v $PWD/data:/data \
localai/localai:latest-aio-cpu
localai/localai:latest
# Or with Podman:
podman run -ti --name local-ai -p 8080:8080 \
-v $PWD/models:/models \
-v $PWD/data:/data \
localai/localai:latest-aio-cpu
localai/localai:latest
```
Or use named volumes:
@@ -243,29 +201,16 @@ docker volume create localai-data
docker run -ti --name local-ai -p 8080:8080 \
-v localai-models:/models \
-v localai-data:/data \
localai/localai:latest-aio-cpu
localai/localai:latest
# Or with Podman:
podman volume create localai-models
podman volume create localai-data
podman run -ti --name local-ai -p 8080:8080 \
-v localai-models:/models \
-v localai-data:/data \
localai/localai:latest-aio-cpu
localai/localai:latest
```
## What's Included in AIO Images
All-in-One images come pre-configured with:
- **Text Generation**: LLM models for chat and completion
- **Image Generation**: Stable Diffusion models
- **Text to Speech**: TTS models
- **Speech to Text**: Whisper models
- **Embeddings**: Vector embedding models
- **Function Calling**: Support for OpenAI-compatible function calling
The AIO images use OpenAI-compatible model names (like `gpt-4`, `gpt-4-vision-preview`) but are backed by open-source models. See the [container images documentation](/getting-started/container-images/#all-in-one-images) for the complete mapping.
## Next Steps
After installation:

View File

@@ -0,0 +1,5 @@
embeddings: true
name: text-embedding-ada-002
backend: llama-cpp
parameters:
model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf

View File

@@ -12,12 +12,3 @@ download_files:
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"step": 25,
"size": "512x512"
}'

View File

@@ -0,0 +1,10 @@
name: jina-reranker-v1-base-en
reranking: true
f16: true
parameters:
model: jina-reranker-v1-tiny-en.f16.gguf
backend: llama-cpp
download_files:
- filename: jina-reranker-v1-tiny-en.f16.gguf
sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf

View File

@@ -0,0 +1,9 @@
name: whisper-1
backend: whisper
parameters:
model: ggml-whisper-base.bin
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@@ -0,0 +1,7 @@
name: tts-1
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
backend: piper
parameters:
model: en-us-amy-low.onnx

View File

@@ -55,4 +55,4 @@ template:
download_files:
- filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf

View File

@@ -1,8 +1,8 @@
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
backend: silero-vad
name: silero-vad
parameters:
model: silero-vad.onnx
download_files:
- filename: silero-vad.onnx
uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808

View File

@@ -1,6 +1,6 @@
context_size: 4096
backend: llama-cpp
f16: true
backend: llama-cpp
mmap: true
mmproj: minicpm-v-4_5-mmproj-f16.gguf
name: gpt-4o
@@ -41,11 +41,10 @@ template:
{{.Input -}}
<|im_start|>assistant
download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8