chore: drop AIO images (#9004)

AIO images are behind, and takes effort to maintain these. Wizard and installation of models have been semplified massively, so AIO images lost their purpose. This allows us to be more laser focused on main images and reliefes stress from CI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-05-16 20:52:08 -04:00 · 2026-03-14 17:49:36 +01:00
parent 0ac4ac5bdd
commit 5affb747a9
44 changed files with 68 additions and 988 deletions
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -26,7 +26,6 @@
        runs-on: ${{ matrix.runs-on }}
        base-image: ${{ matrix.base-image }}
        grpc-base-image: ${{ matrix.grpc-base-image }}
-        aio: ${{ matrix.aio }}
        makeflags: ${{ matrix.makeflags }}
        ubuntu-version: ${{ matrix.ubuntu-version }}
        ubuntu-codename: ${{ matrix.ubuntu-codename }}
@@ -46,7 +45,6 @@
              grpc-base-image: "ubuntu:24.04"
              runs-on: 'ubuntu-latest'
              makeflags: "--jobs=3 --output-sync=target"
-              aio: "-aio-gpu-hipblas"
              ubuntu-version: '2404'
              ubuntu-codename: 'noble'
  
@@ -61,7 +59,6 @@
        cuda-minor-version: ${{ matrix.cuda-minor-version }}
        platforms: ${{ matrix.platforms }}
        runs-on: ${{ matrix.runs-on }}
-        aio: ${{ matrix.aio }}
        base-image: ${{ matrix.base-image }}
        grpc-base-image: ${{ matrix.grpc-base-image }}
        makeflags: ${{ matrix.makeflags }}
@@ -83,7 +80,6 @@
              tag-suffix: ''
              base-image: "ubuntu:24.04"
              runs-on: 'ubuntu-latest'
-              aio: "-aio-cpu"
              makeflags: "--jobs=4 --output-sync=target"
              skip-drivers: 'false'
              ubuntu-version: '2404'
@@ -98,7 +94,6 @@
              base-image: "ubuntu:24.04"
              skip-drivers: 'false'
              makeflags: "--jobs=4 --output-sync=target"
-              aio: "-aio-gpu-nvidia-cuda-12"
              ubuntu-version: '2404'
              ubuntu-codename: 'noble'
            - build-type: 'cublas'
@@ -111,7 +106,6 @@
              base-image: "ubuntu:22.04"
              skip-drivers: 'false'
              makeflags: "--jobs=4 --output-sync=target"
-              aio: "-aio-gpu-nvidia-cuda-13"
              ubuntu-version: '2404'
              ubuntu-codename: 'noble'
            - build-type: 'vulkan'
@@ -122,7 +116,6 @@
              base-image: "ubuntu:24.04"
              skip-drivers: 'false'
              makeflags: "--jobs=4 --output-sync=target"
-              aio: "-aio-gpu-vulkan"
              ubuntu-version: '2404'
              ubuntu-codename: 'noble'
            - build-type: 'intel'
@@ -133,7 +126,6 @@
              tag-suffix: '-gpu-intel'
              runs-on: 'ubuntu-latest'
              makeflags: "--jobs=3 --output-sync=target"
-              aio: "-aio-gpu-intel"
              ubuntu-version: '2404'
              ubuntu-codename: 'noble'
  
@@ -148,7 +140,6 @@
        cuda-minor-version: ${{ matrix.cuda-minor-version }}
        platforms: ${{ matrix.platforms }}
        runs-on: ${{ matrix.runs-on }}
-        aio: ${{ matrix.aio }}
        base-image: ${{ matrix.base-image }}
        grpc-base-image: ${{ matrix.grpc-base-image }}
        makeflags: ${{ matrix.makeflags }}
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -51,11 +51,6 @@ on:
        required: false
        default: '--jobs=4 --output-sync=target'
        type: string
-      aio:
-        description: 'AIO Image Name'
-        required: false
-        default: ''
-        type: string
      ubuntu-version:
        description: 'Ubuntu version'
        required: false
@@ -177,34 +172,6 @@ jobs:
          flavor: |
            latest=${{ inputs.tag-latest }}
            suffix=${{ inputs.tag-suffix }}
-      - name: Docker meta AIO (quay.io)
-        if: inputs.aio != ''
-        id: meta_aio
-        uses: docker/metadata-action@v6
-        with:
-          images: |
-            quay.io/go-skynet/local-ai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }},onlatest=true
-
-      - name: Docker meta AIO (dockerhub)
-        if: inputs.aio != ''
-        id: meta_aio_dockerhub
-        uses: docker/metadata-action@v6
-        with:
-          images: |
-            localai/localai
-          tags: |
-            type=ref,event=branch
-            type=semver,pattern={{raw}}
-          flavor: |
-            latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }},onlatest=true
-
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
@@ -287,41 +254,6 @@ jobs:
          tags: ${{ steps.meta_pull_request.outputs.tags }}
          labels: ${{ steps.meta_pull_request.outputs.labels }}
 ## End testing image
-      - name: Build and push AIO image
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v7
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio.outputs.tags }}
-          labels: ${{ steps.meta_aio.outputs.labels }}
-
-      - name: Build and push AIO image (dockerhub)
-        if: inputs.aio != ''
-        uses: docker/build-push-action@v7
-        with:
-          builder: ${{ steps.buildx.outputs.name }}
-          build-args: |
-            BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
-            MAKEFLAGS=${{ inputs.makeflags }}
-          context: .
-          file: ./Dockerfile.aio
-          platforms: ${{ inputs.platforms }}
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
-          labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
-
      - name: job summary
        run: |
          echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
-
-      - name: job summary(AIO)
-        if: inputs.aio != ''
-        run: |
-          echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -116,7 +116,7 @@ jobs:
          connect-timeout-seconds: 180
          limit-access-to-actor: true

-  tests-aio-container:
+  tests-e2e-container:
    runs-on: ubuntu-latest
    steps:
      - name: Release space from worker
@@ -166,7 +166,7 @@ jobs:
          PATH="$PATH:$HOME/go/bin" make protogen-go
      - name: Test
        run: |
-            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio
+            PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-e2e e2e-aio
      - name: Setup tmate session if tests fail
        if: ${{ failure() }}
        uses: mxschmitt/action-tmate@v3.23
--- a/.gitignore
+++ b/.gitignore
@@ -37,7 +37,6 @@ models/*
 test-models/
 test-dir/
 tests/e2e-aio/backends
-tests/e2e-aio/models
 mock-backend

 release/
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -244,19 +244,16 @@ The e2e tests run LocalAI in a Docker container and exercise the API:
 make test-e2e
 ```

-### Running AIO tests
+### Running E2E container tests

-All-In-One images have a set of tests that automatically verify that most of the endpoints work correctly:
+These tests build a standard LocalAI Docker image and run it with pre-configured model configs to verify that most endpoints work correctly:

 ```bash
 # Build the LocalAI docker image
-make DOCKER_IMAGE=local-ai docker
+make docker-build-e2e

-# Build the corresponding AIO image
-BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
-
-# Run the AIO e2e tests
-LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio
+# Run the e2e tests (uses model configs from tests/e2e-aio/models/)
+make e2e-aio
 ```

 ### Testing backends
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,8 +0,0 @@
-ARG BASE_IMAGE=ubuntu:24.04
-
-FROM ${BASE_IMAGE} 
-
-RUN apt-get update && apt-get install -y pciutils && apt-get clean
-
-COPY aio/ /aio
-ENTRYPOINT [ "/aio/entrypoint.sh" ]
--- a/27
+++ b/27
@@ -172,10 +172,10 @@ test: test-models/testmodel.ggml protogen-go
 	$(MAKE) test-stablediffusion

 ########################################################
-## AIO tests
+## E2E AIO tests (uses standard image with pre-configured models)
 ########################################################

-docker-build-aio:
+docker-build-e2e:
 	docker build \
 		--build-arg MAKEFLAGS="--jobs=5 --output-sync=target" \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
@@ -187,13 +187,12 @@ docker-build-aio:
 		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		-t local-ai:tests -f Dockerfile .
-	BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio

 e2e-aio:
 	LOCALAI_BACKEND_DIR=$(abspath ./backends) \
-	LOCALAI_MODELS_DIR=$(abspath ./models) \
-	LOCALAI_IMAGE_TAG=test \
-	LOCALAI_IMAGE=local-ai-aio \
+	LOCALAI_MODELS_DIR=$(abspath ./tests/e2e-aio/models) \
+	LOCALAI_IMAGE_TAG=tests \
+	LOCALAI_IMAGE=local-ai \
 	$(MAKE) run-e2e-aio

 run-e2e-aio: protogen-go
@@ -443,7 +442,6 @@ test-extra: prepare-test-extra
 	$(MAKE) -C backend/python/ace-step test

 DOCKER_IMAGE?=local-ai
-DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
 BASE_IMAGE?=ubuntu:24.04

@@ -473,21 +471,6 @@ docker-cuda12:
 		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_IMAGE)-cuda-12 .

-docker-aio:
-	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
-	docker build \
-		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
-		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
-		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
-		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
-		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
-		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
-
-docker-aio-all:
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
-	$(MAKE) docker-aio DOCKER_AIO_SIZE=cpu
-
 docker-image-intel:
 	docker build \
 		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \
--- a/README.md
+++ b/README.md
@@ -194,27 +194,6 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/de
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan
 ```

-#### AIO Images (pre-downloaded models):
-
-```bash
-# CPU version
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-
-# NVIDIA CUDA 13 version
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-13
-
-# NVIDIA CUDA 12 version
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
-
-# Intel GPU version
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
-
-# AMD GPU version
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
-```
-
-For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/).
-
 To load models:

 ```bash
@@ -250,7 +229,7 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti
 - May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0)
 - Apr 2025: Rebrand, WebUI enhancements
 - Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack.
- Apr 2025: WebUI overhaul, AIO images updates
+- Apr 2025: WebUI overhaul
 - Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images
 - Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603
 - Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 )
--- a/aio/cpu/README.md
+++ b/aio/cpu/README.md
@@ -1,5 +0,0 @@
-## AIO CPU size
-
-Use this image with CPU-only.
-
-Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,13 +0,0 @@
-embeddings: true
-name: text-embedding-ada-002
-backend: llama-cpp
-parameters:
-  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/cpu/rerank.yaml
+++ b/aio/cpu/rerank.yaml
@@ -1,33 +0,0 @@
-name: jina-reranker-v1-base-en
-reranking: true
-f16: true
-parameters:
-  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
-download_files:
-  - filename: jina-reranker-v1-tiny-en.f16.gguf
-    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
-    uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf 
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/v1/rerank \
-      -H "Content-Type: application/json" \
-      -d '{
-      "model": "jina-reranker-v1-base-en",
-      "query": "Organic skincare products for sensitive skin",
-      "documents": [
-        "Eco-friendly kitchenware for modern homes",
-        "Biodegradable cleaning supplies for eco-conscious consumers",
-        "Organic cotton baby clothes for sensitive skin",
-        "Natural organic skincare range for sensitive skin",
-        "Tech gadgets for smart homes: 2024 edition",
-        "Sustainable gardening tools and compost solutions",
-        "Sensitive skin-friendly facial cleansers and toners",
-        "Organic food wraps and storage solutions",
-        "All-natural pet food for dogs with allergies",
-        "Yoga mats made from recycled materials"
-      ],
-      "top_n": 3
-    }'
--- a/aio/cpu/speech-to-text.yaml
+++ b/aio/cpu/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/cpu/text-to-speech.yaml
+++ b/aio/cpu/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"voice-en-us-amy-low",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/cpu/vad.yaml
+++ b/aio/cpu/vad.yaml
@@ -1,8 +0,0 @@
-backend: silero-vad
-name: silero-vad
-parameters:
-  model: silero-vad.onnx
-download_files:
- filename: silero-vad.onnx
-  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
-  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -1,50 +0,0 @@
-context_size: 4096
-f16: true
-backend: llama-cpp
-mmap: true
-mmproj: minicpm-v-4_5-mmproj-f16.gguf
-name: gpt-4o
-parameters:
-  model: minicpm-v-4_5-Q4_K_M.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    For each function call return a json object with function name and arguments
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
-  sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
-  sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -1,138 +0,0 @@
-#!/bin/bash
-
-echo "===> LocalAI All-in-One (AIO) container starting..."
-
-GPU_ACCELERATION=false
-GPU_VENDOR=""
-
-function check_intel() {
-    if lspci | grep -E 'VGA|3D' | grep -iq intel; then
-        echo "Intel GPU detected"
-        if [ -d /opt/intel ]; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=intel
-        else
-            echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_nvidia_wsl() {
-    if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
-        # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
-        # Make sure the container was run with `--gpus all` as the only required parameter
-        echo "NVIDIA GPU detected via WSL2"
-        # nvidia-smi should be installed in the container
-        if nvidia-smi; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=nvidia
-        else
-            echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_amd() {
-    if lspci | grep -E 'VGA|3D' | grep -iq amd; then
-        echo "AMD GPU detected"
-        # Check if ROCm is installed
-        if [ -d /opt/rocm ]; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=amd
-        else
-            echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_nvidia() {
-    if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
-        echo "NVIDIA GPU detected"
-        # nvidia-smi should be installed in the container
-        if nvidia-smi; then
-            GPU_ACCELERATION=true
-            GPU_VENDOR=nvidia
-        else
-            echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
-        fi
-    fi
-}
-
-function check_metal() {
-    if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
-        echo "Apple Metal supported GPU detected"
-        GPU_ACCELERATION=true
-        GPU_VENDOR=apple
-    fi
-}
-
-function detect_gpu() {
-    case "$(uname -s)" in
-        Linux)
-            check_nvidia
-            check_amd
-            check_intel
-            check_nvidia_wsl
-            ;;
-        Darwin)
-            check_metal
-            ;;
-    esac
-}
-
-function detect_gpu_size() {
-    # Attempting to find GPU memory size for NVIDIA GPUs
-    if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then
-        echo "NVIDIA GPU detected. Attempting to find memory size..."
-        # Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected.
-        # If handling multiple GPUs is required in the future, this is the place to do it
-        nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1)
-        if [ ! -z "$nvidia_sm" ]; then
-            echo "Total GPU Memory: $nvidia_sm MiB"
-            # if bigger than 8GB, use 16GB
-            #if [ "$nvidia_sm" -gt 8192 ]; then
-            #    GPU_SIZE=gpu-16g
-            #else
-            GPU_SIZE=gpu-8g
-            #fi
-        else
-            echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
-            GPU_SIZE=gpu-8g
-        fi
-    elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
-        GPU_SIZE=intel
-    # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
-    elif [ "$GPU_ACCELERATION" = true ]; then
-        echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
-        GPU_SIZE=gpu-8g
-
-    # default to cpu if GPU_SIZE is not set
-    else
-        echo "GPU acceleration is not enabled or supported. Defaulting to CPU."
-        GPU_SIZE=cpu
-    fi
-}
-
-function check_vars() {
-    if [ -z "$MODELS" ]; then
-        echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
-        exit 1
-    fi
-
-    if [ -z "$PROFILE" ]; then
-        echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
-        exit 1
-    fi
-}
-
-detect_gpu
-detect_gpu_size
-
-PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}"
-
-check_vars
-
-echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
-
-exec /entrypoint.sh "$@"
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@@ -1,13 +0,0 @@
-embeddings: true
-name: text-embedding-ada-002
-backend: llama-cpp
-parameters:
-  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -1,25 +0,0 @@
-name: stablediffusion
-parameters:
-  model: DreamShaper_8_pruned.safetensors
-backend: diffusers
-step: 25
-f16: true
-
-diffusers:
-  pipeline_type: StableDiffusionPipeline
-  cuda: true
-  enable_parameters: "negative_prompt,num_inference_steps"
-  scheduler_type: "k_dpmpp_2m"
-
-download_files:
- filename: DreamShaper_8_pruned.safetensors
-  uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/aio/gpu-8g/rerank.yaml
+++ b/aio/gpu-8g/rerank.yaml
@@ -1,33 +0,0 @@
-name: jina-reranker-v1-base-en
-reranking: true
-f16: true
-parameters:
-  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
-download_files:
-  - filename: jina-reranker-v1-tiny-en.f16.gguf
-    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
-    uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf 
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/v1/rerank \
-      -H "Content-Type: application/json" \
-      -d '{
-      "model": "jina-reranker-v1-base-en",
-      "query": "Organic skincare products for sensitive skin",
-      "documents": [
-        "Eco-friendly kitchenware for modern homes",
-        "Biodegradable cleaning supplies for eco-conscious consumers",
-        "Organic cotton baby clothes for sensitive skin",
-        "Natural organic skincare range for sensitive skin",
-        "Tech gadgets for smart homes: 2024 edition",
-        "Sustainable gardening tools and compost solutions",
-        "Sensitive skin-friendly facial cleansers and toners",
-        "Organic food wraps and storage solutions",
-        "All-natural pet food for dogs with allergies",
-        "Yoga mats made from recycled materials"
-      ],
-      "top_n": 3
-    }'
--- a/aio/gpu-8g/speech-to-text.yaml
+++ b/aio/gpu-8g/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/gpu-8g/text-to-speech.yaml
+++ b/aio/gpu-8g/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"tts-1",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -1,54 +0,0 @@
-context_size: 4096
-f16: true
-backend: llama-cpp
-function:
-  capture_llm_results:
-  - (?s)<Thought>(.*?)</Thought>
-  grammar:
-    properties_order: name,arguments
-  json_regex_match:
-  - (?s)<Output>(.*?)</Output>
-  replace_llm_results:
-  - key: (?s)<Thought>(.*?)</Thought>
-    value: ""
-mmap: true
-name: gpt-4
-parameters:
-  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are an AI assistant that executes function calls, and these are the tools at your disposal:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-  sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4
-  uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
--- a/aio/gpu-8g/vad.yaml
+++ b/aio/gpu-8g/vad.yaml
@@ -1,8 +0,0 @@
-backend: silero-vad
-name: silero-vad
-parameters:
-  model: silero-vad.onnx
-download_files:
- filename: silero-vad.onnx
-  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
-  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -1,50 +0,0 @@
-context_size: 4096
-backend: llama-cpp
-f16: true
-mmap: true
-mmproj: minicpm-v-4_5-mmproj-f16.gguf
-name: gpt-4o
-parameters:
-  model: minicpm-v-4_5-Q4_K_M.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
- <|endoftext|>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    For each function call return a json object with function name and arguments
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: minicpm-v-4_5-Q4_K_M.gguf
-  sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
- filename: minicpm-v-4_5-mmproj-f16.gguf
-  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
-  sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
--- a/aio/intel/embeddings.yaml
+++ b/aio/intel/embeddings.yaml
@@ -1,13 +0,0 @@
-embeddings: true
-name: text-embedding-ada-002
-backend: llama-cpp
-parameters:
-  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "text-embedding-ada-002"
-    }'
--- a/aio/intel/image-gen.yaml
+++ b/aio/intel/image-gen.yaml
@@ -1,20 +0,0 @@
-name: stablediffusion
-parameters:
-  model: Lykon/dreamshaper-8
-backend: diffusers
-step: 25
-f16: true
-diffusers:
-  pipeline_type: StableDiffusionPipeline
-  cuda: true
-  enable_parameters: "negative_prompt,num_inference_steps"
-  scheduler_type: "k_dpmpp_2m"
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/aio/intel/rerank.yaml
+++ b/aio/intel/rerank.yaml
@@ -1,33 +0,0 @@
-name: jina-reranker-v1-base-en
-reranking: true
-f16: true
-parameters:
-  model: jina-reranker-v1-tiny-en.f16.gguf
-backend: llama-cpp
-download_files:
-  - filename: jina-reranker-v1-tiny-en.f16.gguf
-    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
-    uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf 
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/v1/rerank \
-      -H "Content-Type: application/json" \
-      -d '{
-      "model": "jina-reranker-v1-base-en",
-      "query": "Organic skincare products for sensitive skin",
-      "documents": [
-        "Eco-friendly kitchenware for modern homes",
-        "Biodegradable cleaning supplies for eco-conscious consumers",
-        "Organic cotton baby clothes for sensitive skin",
-        "Natural organic skincare range for sensitive skin",
-        "Tech gadgets for smart homes: 2024 edition",
-        "Sustainable gardening tools and compost solutions",
-        "Sensitive skin-friendly facial cleansers and toners",
-        "Organic food wraps and storage solutions",
-        "All-natural pet food for dogs with allergies",
-        "Yoga mats made from recycled materials"
-      ],
-      "top_n": 3
-    }'
--- a/aio/intel/speech-to-text.yaml
+++ b/aio/intel/speech-to-text.yaml
@@ -1,18 +0,0 @@
-name: whisper-1
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
-
-download_files:
- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/aio/intel/text-to-speech.yaml
+++ b/aio/intel/text-to-speech.yaml
@@ -1,15 +0,0 @@
-name: tts-1
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-backend: piper
-parameters:
-  model: en-us-amy-low.onnx
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"tts-1",
-      "input": "Hi, this is a test."
-    }'
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -1,54 +0,0 @@
-context_size: 4096
-f16: true
-backend: llama-cpp
-function:
-  capture_llm_results:
-  - (?s)<Thought>(.*?)</Thought>
-  grammar:
-    properties_order: name,arguments
-  json_regex_match:
-  - (?s)<Output>(.*?)</Output>
-  replace_llm_results:
-  - key: (?s)<Thought>(.*?)</Thought>
-    value: ""
-mmap: true
-name: gpt-4
-parameters:
-  model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf
-stopwords:
- <|im_end|>
- <dummy32000>
- </s>
-template:
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  chat_message: |
-    <|im_start|>{{ .RoleName }}
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content }}
-    {{ end -}}
-    {{ if .FunctionCall -}}
-    {{toJson .FunctionCall}}
-    {{ end -}}<|im_end|>
-  completion: |
-    {{.Input}}
-  function: |
-    <|im_start|>system
-    You are an AI assistant that executes function calls, and these are the tools at your disposal:
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    <|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-
-download_files:
- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf
-  sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5
-  uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf
--- a/docs/content/features/GPU-acceleration.md
+++ b/docs/content/features/GPU-acceleration.md
@@ -206,7 +206,7 @@ The following are examples of the ROCm specific configuration elements required.

 ```yaml
    # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
-    image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
+    image: quay.io/go-skynet/local-ai:master-gpu-hipblas
    environment:
      - DEBUG=true
      # If your gpu is not already included in the current list of default targets the following build details are required.
@@ -229,13 +229,11 @@ docker run \
 -e GPU_TARGETS=gfx906 \
 --device /dev/dri \
 --device /dev/kfd \
- quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
+ quay.io/go-skynet/local-ai:master-gpu-hipblas
 ```

 Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command.

-The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size.
-
 #### Example (k8s) (Advanced Deployment/WIP)

 For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu).
@@ -434,7 +432,7 @@ If your AMD GPU is not in the default target list, set `REBUILD=true` and `GPU_T
 ```bash
 docker run -e REBUILD=true -e BUILD_TYPE=hipblas -e GPU_TARGETS=gfx1030 \
  --device /dev/dri --device /dev/kfd \
-  quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
+  quay.io/go-skynet/local-ai:master-gpu-hipblas
 ```

 ### Intel SYCL: model hangs
--- a/docs/content/features/gpt-vision.md
+++ b/docs/content/features/gpt-vision.md
@@ -32,6 +32,4 @@ Grammars and function tools can be used as well in conjunction with vision APIs:

 ### Setup

-All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case. 
-
 To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI-examples/blob/main/configurations/llava/llava.yaml).
--- a/docs/content/getting-started/container-images.md
+++ b/docs/content/getting-started/container-images.md
@@ -8,8 +8,6 @@ ico = "rocket_launch"

 LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai).

-All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed.
-
 For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "installation/build" %}}).

 {{% notice tip %}}
@@ -17,7 +15,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA
 **Available Images Types**:

 - Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration.

 {{% /notice %}}

@@ -124,109 +121,6 @@ These images are compatible with Nvidia ARM64 devices with CUDA 13, such as the

 {{< /tabs >}}

-## All-in-one images
-
-All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and require no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
-
-In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below
-
-| Category | Model name | Real model (CPU) | Real model (GPU) |
-| ---- | ---- | ---- | ---- |
-| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` |
-| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` |
-| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` |
-| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same |
-| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same |
-| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` |
-
-### Usage
-
-Select the image (CPU or GPU) and start the container with Docker:
-
-```bash
-docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu
-```
-
-LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models).
-
-
-Or with a docker-compose file:
-
-```yaml
-version: "3.9"
-services:
-  api:
-    image: localai/localai:latest-aio-cpu
-    # For a specific version:
-    # image: localai/localai:{{< version >}}-aio-cpu
-    # For Nvidia GPUs decomment one of the following (cuda12 or cuda13):
-    # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12
-    # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-13
-    # image: localai/localai:latest-aio-gpu-nvidia-cuda-12
-    # image: localai/localai:latest-aio-gpu-nvidia-cuda-13
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
-      interval: 1m
-      timeout: 20m
-      retries: 5
-    ports:
-      - 8080:8080
-    environment:
-      - DEBUG=true
-      # ...
-    volumes:
-      - ./models:/models:cached
-    # decomment the following piece if running with Nvidia GPUs
-    # deploy:
-    #   resources:
-    #     reservations:
-    #       devices:
-    #         - driver: nvidia
-    #           count: 1
-    #           capabilities: [gpu]
-```
-
-{{% notice tip %}}
-
-**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
-
-You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). 
-
-If you want to use a named model or a local directory, you can mount it as a volume to `/models`:
-
-```bash
-docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu
-```
-
-or associate a volume:
-
-```bash
-docker volume create localai-models
-docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu
-```
-
- {{% /notice %}}
-
-### Available AIO images
-
-| Description | Quay | Docker Hub                                   |
-| --- | --- |-----------------------------------------------|
-| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu`                      |
-| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu`             |
-| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12`                      |
-| Latest images for Nvidia GPU (CUDA13) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-13` | `localai/localai:latest-aio-gpu-nvidia-cuda-13`                      |
-| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas`                      |
-| Latest images for Intel GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel` | `localai/localai:latest-aio-gpu-intel`                      |
-
-### Available environment variables
-
-The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image
-
-| Variable | Default | Description |
-| ---------------------| ------- | ----------- |
-| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
-| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "getting-started/models" %}})) |
-
 ## See Also

 - [GPU acceleration]({{%relref "features/gpu-acceleration" %}})
--- a/docs/content/getting-started/try-it-out.md
+++ b/docs/content/getting-started/try-it-out.md
@@ -20,7 +20,7 @@ With the CLI you can list the models with `local-ai models list` and install the
 You can also [run models manually]({{%relref "getting-started/models" %}}) by copying files into the `models` directory.
 {{% /notice %}}

-You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed.
+You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are examples - replace them with the model names you have installed.

 ### Text Generation

--- a/docs/content/installation/_index.en.md
+++ b/docs/content/installation/_index.en.md
@@ -30,7 +30,7 @@ docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
 podman run -p 8080:8080 --name local-ai -ti localai/localai:latest
 ```

-This will start LocalAI. The API will be available at `http://localhost:8080`. For images with pre-configured models, see [All-in-One images](/getting-started/container-images/#all-in-one-images).
+This will start LocalAI. The API will be available at `http://localhost:8080`.

 For other platforms:
 - **macOS**: Download the [DMG](macos/)
--- a/docs/content/installation/containers.md
+++ b/docs/content/installation/containers.md
@@ -93,48 +93,6 @@ CUDA 13 (for Nvidia DGX Spark):
 docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64-cuda-13
 ```

-### All-in-One (AIO) Images
-
-**Recommended for beginners** - These images come pre-configured with models and backends, ready to use immediately.
-
-#### CPU Image
-
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-# Or with Podman:
-podman run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
-```
-
-#### GPU Images
-
-**NVIDIA CUDA 13:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-13
-# Or with Podman:
-podman run -ti --name local-ai -p 8080:8080 --device nvidia.com/gpu=all localai/localai:latest-aio-gpu-nvidia-cuda-13
-```
-
-**NVIDIA CUDA 12:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
-# Or with Podman:
-podman run -ti --name local-ai -p 8080:8080 --device nvidia.com/gpu=all localai/localai:latest-aio-gpu-nvidia-cuda-12
-```
-
-**AMD GPU (ROCm):**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
-# Or with Podman:
-podman run -ti --name local-ai -p 8080:8080 --device rocm.com/gpu=all localai/localai:latest-aio-gpu-hipblas
-```
-
-**Intel GPU:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
-# Or with Podman:
-podman run -ti --name local-ai -p 8080:8080 --device gpu.intel.com/all localai/localai:latest-aio-gpu-intel
-```
-
 ## Using Compose

 For a more manageable setup, especially with persistent volumes, use Docker Compose or Podman Compose:
@@ -147,8 +105,8 @@ The CDI approach is recommended for newer versions of the NVIDIA Container Toolk
 version: "3.9"
 services:
  api:
-    image: localai/localai:latest-aio-gpu-nvidia-cuda-12
-    # For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13
+    image: localai/localai:latest-gpu-nvidia-cuda-12
+    # For CUDA 13, use: localai/localai:latest-gpu-nvidia-cuda-13
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
      interval: 1m
@@ -187,8 +145,8 @@ If you are using an older version of the NVIDIA Container Toolkit (before 1.14),
 version: "3.9"
 services:
  api:
-    image: localai/localai:latest-aio-gpu-nvidia-cuda-12
-    # For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13
+    image: localai/localai:latest-gpu-nvidia-cuda-12
+    # For CUDA 13, use: localai/localai:latest-gpu-nvidia-cuda-13
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
      interval: 1m
@@ -227,12 +185,12 @@ To persist models and data, mount volumes:
 docker run -ti --name local-ai -p 8080:8080 \
  -v $PWD/models:/models \
  -v $PWD/data:/data \
-  localai/localai:latest-aio-cpu
+  localai/localai:latest
 # Or with Podman:
 podman run -ti --name local-ai -p 8080:8080 \
  -v $PWD/models:/models \
  -v $PWD/data:/data \
-  localai/localai:latest-aio-cpu
+  localai/localai:latest
 ```

 Or use named volumes:
@@ -243,29 +201,16 @@ docker volume create localai-data
 docker run -ti --name local-ai -p 8080:8080 \
  -v localai-models:/models \
  -v localai-data:/data \
-  localai/localai:latest-aio-cpu
+  localai/localai:latest
 # Or with Podman:
 podman volume create localai-models
 podman volume create localai-data
 podman run -ti --name local-ai -p 8080:8080 \
  -v localai-models:/models \
  -v localai-data:/data \
-  localai/localai:latest-aio-cpu
+  localai/localai:latest
 ```

-## What's Included in AIO Images
-
-All-in-One images come pre-configured with:
-
- **Text Generation**: LLM models for chat and completion
- **Image Generation**: Stable Diffusion models
- **Text to Speech**: TTS models
- **Speech to Text**: Whisper models
- **Embeddings**: Vector embedding models
- **Function Calling**: Support for OpenAI-compatible function calling
-
-The AIO images use OpenAI-compatible model names (like `gpt-4`, `gpt-4-vision-preview`) but are backed by open-source models. See the [container images documentation](/getting-started/container-images/#all-in-one-images) for the complete mapping.
-
 ## Next Steps

 After installation:
--- a/tests/e2e-aio/models/embeddings.yaml
+++ b/tests/e2e-aio/models/embeddings.yaml
@@ -0,0 +1,5 @@
+embeddings: true
+name: text-embedding-ada-002
+backend: llama-cpp
+parameters:
+  model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf
--- a/tests/e2e-aio/models/image-gen.yaml
+++ b/tests/e2e-aio/models/image-gen.yaml
@@ -12,12 +12,3 @@ download_files:
 - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
  sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
  uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "step": 25,
-            "size": "512x512"
-          }'
--- a/tests/e2e-aio/models/rerank.yaml
+++ b/tests/e2e-aio/models/rerank.yaml
@@ -0,0 +1,10 @@
+name: jina-reranker-v1-base-en
+reranking: true
+f16: true
+parameters:
+  model: jina-reranker-v1-tiny-en.f16.gguf
+backend: llama-cpp
+download_files:
+  - filename: jina-reranker-v1-tiny-en.f16.gguf
+    sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407
+    uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf
--- a/tests/e2e-aio/models/speech-to-text.yaml
+++ b/tests/e2e-aio/models/speech-to-text.yaml
@@ -0,0 +1,9 @@
+name: whisper-1
+backend: whisper
+parameters:
+  model: ggml-whisper-base.bin
+
+download_files:
+- filename: "ggml-whisper-base.bin"
+  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
--- a/tests/e2e-aio/models/text-to-speech.yaml
+++ b/tests/e2e-aio/models/text-to-speech.yaml
@@ -0,0 +1,7 @@
+name: tts-1
+download_files:
+  - filename: voice-en-us-amy-low.tar.gz
+    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
+backend: piper
+parameters:
+  model: en-us-amy-low.onnx
--- a/tests/e2e-aio/models/text-to-text.yaml
+++ b/tests/e2e-aio/models/text-to-text.yaml
@@ -55,4 +55,4 @@ template:
 download_files:
 - filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
  sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5
-  uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
+  uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf
--- a/tests/e2e-aio/models/vad.yaml
+++ b/tests/e2e-aio/models/vad.yaml
@@ -1,8 +1,8 @@
-backend: silero-vad
-name: silero-vad
-parameters:
-  model: silero-vad.onnx
-download_files:
- filename: silero-vad.onnx
-  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
-  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
+backend: silero-vad
+name: silero-vad
+parameters:
+  model: silero-vad.onnx
+download_files:
+- filename: silero-vad.onnx
+  uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
+  sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
--- a/tests/e2e-aio/models/vision.yaml
+++ b/tests/e2e-aio/models/vision.yaml
@@ -1,6 +1,6 @@
 context_size: 4096
-backend: llama-cpp
 f16: true
+backend: llama-cpp
 mmap: true
 mmproj: minicpm-v-4_5-mmproj-f16.gguf
 name: gpt-4o
@@ -41,11 +41,10 @@ template:
    {{.Input -}}
    <|im_start|>assistant

-
 download_files:
 - filename: minicpm-v-4_5-Q4_K_M.gguf
  sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f
  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf
 - filename: minicpm-v-4_5-mmproj-f16.gguf
  uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf
-  sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8
+  sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8