diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index a77f74165..8b672e897 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -26,7 +26,6 @@ runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} grpc-base-image: ${{ matrix.grpc-base-image }} - aio: ${{ matrix.aio }} makeflags: ${{ matrix.makeflags }} ubuntu-version: ${{ matrix.ubuntu-version }} ubuntu-codename: ${{ matrix.ubuntu-codename }} @@ -46,7 +45,6 @@ grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - aio: "-aio-gpu-hipblas" ubuntu-version: '2404' ubuntu-codename: 'noble' @@ -61,7 +59,6 @@ cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} - aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} @@ -83,7 +80,6 @@ tag-suffix: '' base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' - aio: "-aio-cpu" makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' ubuntu-version: '2404' @@ -98,7 +94,6 @@ base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - aio: "-aio-gpu-nvidia-cuda-12" ubuntu-version: '2404' ubuntu-codename: 'noble' - build-type: 'cublas' @@ -111,7 +106,6 @@ base-image: "ubuntu:22.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - aio: "-aio-gpu-nvidia-cuda-13" ubuntu-version: '2404' ubuntu-codename: 'noble' - build-type: 'vulkan' @@ -122,7 +116,6 @@ base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" - aio: "-aio-gpu-vulkan" ubuntu-version: '2404' ubuntu-codename: 'noble' - build-type: 'intel' @@ -133,7 +126,6 @@ tag-suffix: '-gpu-intel' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - aio: "-aio-gpu-intel" ubuntu-version: '2404' ubuntu-codename: 'noble' @@ -148,7 +140,6 @@ cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} - aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} grpc-base-image: ${{ matrix.grpc-base-image }} makeflags: ${{ matrix.makeflags }} diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 00df7db84..9483239d2 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -51,11 +51,6 @@ on: required: false default: '--jobs=4 --output-sync=target' type: string - aio: - description: 'AIO Image Name' - required: false - default: '' - type: string ubuntu-version: description: 'Ubuntu version' required: false @@ -177,34 +172,6 @@ jobs: flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.tag-suffix }} - - name: Docker meta AIO (quay.io) - if: inputs.aio != '' - id: meta_aio - uses: docker/metadata-action@v6 - with: - images: | - quay.io/go-skynet/local-ai - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - flavor: | - latest=${{ inputs.tag-latest }} - suffix=${{ inputs.aio }},onlatest=true - - - name: Docker meta AIO (dockerhub) - if: inputs.aio != '' - id: meta_aio_dockerhub - uses: docker/metadata-action@v6 - with: - images: | - localai/localai - tags: | - type=ref,event=branch - type=semver,pattern={{raw}} - flavor: | - latest=${{ inputs.tag-latest }} - suffix=${{ inputs.aio }},onlatest=true - - name: Set up QEMU uses: docker/setup-qemu-action@master with: @@ -287,41 +254,6 @@ jobs: tags: ${{ steps.meta_pull_request.outputs.tags }} labels: ${{ steps.meta_pull_request.outputs.labels }} ## End testing image - - name: Build and push AIO image - if: inputs.aio != '' - uses: docker/build-push-action@v7 - with: - builder: ${{ steps.buildx.outputs.name }} - build-args: | - BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} - MAKEFLAGS=${{ inputs.makeflags }} - context: . - file: ./Dockerfile.aio - platforms: ${{ inputs.platforms }} - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta_aio.outputs.tags }} - labels: ${{ steps.meta_aio.outputs.labels }} - - - name: Build and push AIO image (dockerhub) - if: inputs.aio != '' - uses: docker/build-push-action@v7 - with: - builder: ${{ steps.buildx.outputs.name }} - build-args: | - BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }} - MAKEFLAGS=${{ inputs.makeflags }} - context: . - file: ./Dockerfile.aio - platforms: ${{ inputs.platforms }} - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta_aio_dockerhub.outputs.tags }} - labels: ${{ steps.meta_aio_dockerhub.outputs.labels }} - - name: job summary run: | echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY - - - name: job summary(AIO) - if: inputs.aio != '' - run: | - echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a37c3b50..d58e3b077 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -116,7 +116,7 @@ jobs: connect-timeout-seconds: 180 limit-access-to-actor: true - tests-aio-container: + tests-e2e-container: runs-on: ubuntu-latest steps: - name: Release space from worker @@ -166,7 +166,7 @@ jobs: PATH="$PATH:$HOME/go/bin" make protogen-go - name: Test run: | - PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-aio e2e-aio + PATH="$PATH:$HOME/go/bin" make backends/local-store backends/silero-vad backends/llama-cpp backends/whisper backends/piper backends/stablediffusion-ggml docker-build-e2e e2e-aio - name: Setup tmate session if tests fail if: ${{ failure() }} uses: mxschmitt/action-tmate@v3.23 diff --git a/.gitignore b/.gitignore index 3dcb309ca..2e1a924d0 100644 --- a/.gitignore +++ b/.gitignore @@ -37,7 +37,6 @@ models/* test-models/ test-dir/ tests/e2e-aio/backends -tests/e2e-aio/models mock-backend release/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e5f70d73e..a04bee185 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -244,19 +244,16 @@ The e2e tests run LocalAI in a Docker container and exercise the API: make test-e2e ``` -### Running AIO tests +### Running E2E container tests -All-In-One images have a set of tests that automatically verify that most of the endpoints work correctly: +These tests build a standard LocalAI Docker image and run it with pre-configured model configs to verify that most endpoints work correctly: ```bash # Build the LocalAI docker image -make DOCKER_IMAGE=local-ai docker +make docker-build-e2e -# Build the corresponding AIO image -BASE_IMAGE=local-ai DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio - -# Run the AIO e2e tests -LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio make run-e2e-aio +# Run the e2e tests (uses model configs from tests/e2e-aio/models/) +make e2e-aio ``` ### Testing backends diff --git a/Dockerfile.aio b/Dockerfile.aio deleted file mode 100644 index ccc2fc94b..000000000 --- a/Dockerfile.aio +++ /dev/null @@ -1,8 +0,0 @@ -ARG BASE_IMAGE=ubuntu:24.04 - -FROM ${BASE_IMAGE} - -RUN apt-get update && apt-get install -y pciutils && apt-get clean - -COPY aio/ /aio -ENTRYPOINT [ "/aio/entrypoint.sh" ] \ No newline at end of file diff --git a/Makefile b/Makefile index 6a8b639d1..f997aa55f 100644 --- a/Makefile +++ b/Makefile @@ -172,10 +172,10 @@ test: test-models/testmodel.ggml protogen-go $(MAKE) test-stablediffusion ######################################################## -## AIO tests +## E2E AIO tests (uses standard image with pre-configured models) ######################################################## -docker-build-aio: +docker-build-e2e: docker build \ --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ @@ -187,13 +187,12 @@ docker-build-aio: --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ --build-arg GO_TAGS="$(GO_TAGS)" \ -t local-ai:tests -f Dockerfile . - BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio e2e-aio: LOCALAI_BACKEND_DIR=$(abspath ./backends) \ - LOCALAI_MODELS_DIR=$(abspath ./models) \ - LOCALAI_IMAGE_TAG=test \ - LOCALAI_IMAGE=local-ai-aio \ + LOCALAI_MODELS_DIR=$(abspath ./tests/e2e-aio/models) \ + LOCALAI_IMAGE_TAG=tests \ + LOCALAI_IMAGE=local-ai \ $(MAKE) run-e2e-aio run-e2e-aio: protogen-go @@ -443,7 +442,6 @@ test-extra: prepare-test-extra $(MAKE) -C backend/python/ace-step test DOCKER_IMAGE?=local-ai -DOCKER_AIO_IMAGE?=local-ai-aio IMAGE_TYPE?=core BASE_IMAGE?=ubuntu:24.04 @@ -473,21 +471,6 @@ docker-cuda12: --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_IMAGE)-cuda-12 . -docker-aio: - @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" - docker build \ - --build-arg BASE_IMAGE=$(BASE_IMAGE) \ - --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ - --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ - --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ - --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ - --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ - -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . - -docker-aio-all: - $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu - $(MAKE) docker-aio DOCKER_AIO_SIZE=cpu - docker-image-intel: docker build \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \ diff --git a/README.md b/README.md index 8cbb9df7a..f2cc69a3d 100644 --- a/README.md +++ b/README.md @@ -194,27 +194,6 @@ docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/de docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan ``` -#### AIO Images (pre-downloaded models): - -```bash -# CPU version -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu - -# NVIDIA CUDA 13 version -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-13 - -# NVIDIA CUDA 12 version -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 - -# Intel GPU version -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel - -# AMD GPU version -docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas -``` - -For more information about the AIO images and pre-downloaded models, see [Container Documentation](https://localai.io/basics/container/). - To load models: ```bash @@ -250,7 +229,7 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti - May 2025: Important: image name changes [See release](https://github.com/mudler/LocalAI/releases/tag/v2.29.0) - Apr 2025: Rebrand, WebUI enhancements - Apr 2025: [LocalAGI](https://github.com/mudler/LocalAGI) and [LocalRecall](https://github.com/mudler/LocalRecall) join the LocalAI family stack. -- Apr 2025: WebUI overhaul, AIO images updates +- Apr 2025: WebUI overhaul - Feb 2025: Backend cleanup, Breaking changes, new backends (kokoro, OutelTTS, faster-whisper), Nvidia L4T images - Jan 2025: LocalAI model release: https://huggingface.co/mudler/LocalAI-functioncall-phi-4-v0.3, SANA support in diffusers: https://github.com/mudler/LocalAI/pull/4603 - Dec 2024: stablediffusion.cpp backend (ggml) added ( https://github.com/mudler/LocalAI/pull/4289 ) diff --git a/aio/cpu/README.md b/aio/cpu/README.md deleted file mode 100644 index 8b0b1086d..000000000 --- a/aio/cpu/README.md +++ /dev/null @@ -1,5 +0,0 @@ -## AIO CPU size - -Use this image with CPU-only. - -Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc). \ No newline at end of file diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml deleted file mode 100644 index 0f88f4511..000000000 --- a/aio/cpu/embeddings.yaml +++ /dev/null @@ -1,13 +0,0 @@ -embeddings: true -name: text-embedding-ada-002 -backend: llama-cpp -parameters: - model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf - -usage: | - You can test this model with curl like this: - - curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ - "input": "Your text string goes here", - "model": "text-embedding-ada-002" - }' \ No newline at end of file diff --git a/aio/cpu/rerank.yaml b/aio/cpu/rerank.yaml deleted file mode 100644 index 70d386b2b..000000000 --- a/aio/cpu/rerank.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: jina-reranker-v1-base-en -reranking: true -f16: true -parameters: - model: jina-reranker-v1-tiny-en.f16.gguf -backend: llama-cpp -download_files: - - filename: jina-reranker-v1-tiny-en.f16.gguf - sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 - uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf - -usage: | - You can test this model with curl like this: - - curl http://localhost:8080/v1/rerank \ - -H "Content-Type: application/json" \ - -d '{ - "model": "jina-reranker-v1-base-en", - "query": "Organic skincare products for sensitive skin", - "documents": [ - "Eco-friendly kitchenware for modern homes", - "Biodegradable cleaning supplies for eco-conscious consumers", - "Organic cotton baby clothes for sensitive skin", - "Natural organic skincare range for sensitive skin", - "Tech gadgets for smart homes: 2024 edition", - "Sustainable gardening tools and compost solutions", - "Sensitive skin-friendly facial cleansers and toners", - "Organic food wraps and storage solutions", - "All-natural pet food for dogs with allergies", - "Yoga mats made from recycled materials" - ], - "top_n": 3 - }' diff --git a/aio/cpu/speech-to-text.yaml b/aio/cpu/speech-to-text.yaml deleted file mode 100644 index 77850d791..000000000 --- a/aio/cpu/speech-to-text.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: whisper-1 -backend: whisper -parameters: - model: ggml-whisper-base.bin - -usage: | - ## example audio file - wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg - - ## Send the example audio file to the transcriptions endpoint - curl http://localhost:8080/v1/audio/transcriptions \ - -H "Content-Type: multipart/form-data" \ - -F file="@$PWD/gb1.ogg" -F model="whisper-1" - -download_files: -- filename: "ggml-whisper-base.bin" - sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" - uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/cpu/text-to-speech.yaml b/aio/cpu/text-to-speech.yaml deleted file mode 100644 index 4009c3f77..000000000 --- a/aio/cpu/text-to-speech.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: tts-1 -download_files: - - filename: voice-en-us-amy-low.tar.gz - uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz -backend: piper -parameters: - model: en-us-amy-low.onnx - -usage: | - To test if this model works as expected, you can use the following curl command: - - curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "model":"voice-en-us-amy-low", - "input": "Hi, this is a test." - }' \ No newline at end of file diff --git a/aio/cpu/vad.yaml b/aio/cpu/vad.yaml deleted file mode 100644 index b0dc70d75..000000000 --- a/aio/cpu/vad.yaml +++ /dev/null @@ -1,8 +0,0 @@ -backend: silero-vad -name: silero-vad -parameters: - model: silero-vad.onnx -download_files: -- filename: silero-vad.onnx - uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx - sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808 \ No newline at end of file diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml deleted file mode 100644 index 37852da05..000000000 --- a/aio/cpu/vision.yaml +++ /dev/null @@ -1,50 +0,0 @@ -context_size: 4096 -f16: true -backend: llama-cpp -mmap: true -mmproj: minicpm-v-4_5-mmproj-f16.gguf -name: gpt-4o -parameters: - model: minicpm-v-4_5-Q4_K_M.gguf -stopwords: -- <|im_end|> -- -- -- <|endoftext|> -template: - chat: | - {{.Input -}} - <|im_start|>assistant - chat_message: | - <|im_start|>{{ .RoleName }} - {{ if .FunctionCall -}} - Function call: - {{ else if eq .RoleName "tool" -}} - Function response: - {{ end -}} - {{ if .Content -}} - {{.Content }} - {{ end -}} - {{ if .FunctionCall -}} - {{toJson .FunctionCall}} - {{ end -}}<|im_end|> - completion: | - {{.Input}} - function: | - <|im_start|>system - You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: - {{range .Functions}} - {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} - {{end}} - For each function call return a json object with function name and arguments - <|im_end|> - {{.Input -}} - <|im_start|>assistant - -download_files: -- filename: minicpm-v-4_5-Q4_K_M.gguf - sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f - uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf -- filename: minicpm-v-4_5-mmproj-f16.gguf - uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf - sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8 \ No newline at end of file diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh deleted file mode 100755 index a4b83a9da..000000000 --- a/aio/entrypoint.sh +++ /dev/null @@ -1,138 +0,0 @@ -#!/bin/bash - -echo "===> LocalAI All-in-One (AIO) container starting..." - -GPU_ACCELERATION=false -GPU_VENDOR="" - -function check_intel() { - if lspci | grep -E 'VGA|3D' | grep -iq intel; then - echo "Intel GPU detected" - if [ -d /opt/intel ]; then - GPU_ACCELERATION=true - GPU_VENDOR=intel - else - echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available." - fi - fi -} - -function check_nvidia_wsl() { - if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then - # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi - # Make sure the container was run with `--gpus all` as the only required parameter - echo "NVIDIA GPU detected via WSL2" - # nvidia-smi should be installed in the container - if nvidia-smi; then - GPU_ACCELERATION=true - GPU_VENDOR=nvidia - else - echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available." - fi - fi -} - -function check_amd() { - if lspci | grep -E 'VGA|3D' | grep -iq amd; then - echo "AMD GPU detected" - # Check if ROCm is installed - if [ -d /opt/rocm ]; then - GPU_ACCELERATION=true - GPU_VENDOR=amd - else - echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available." - fi - fi -} - -function check_nvidia() { - if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then - echo "NVIDIA GPU detected" - # nvidia-smi should be installed in the container - if nvidia-smi; then - GPU_ACCELERATION=true - GPU_VENDOR=nvidia - else - echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available." - fi - fi -} - -function check_metal() { - if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then - echo "Apple Metal supported GPU detected" - GPU_ACCELERATION=true - GPU_VENDOR=apple - fi -} - -function detect_gpu() { - case "$(uname -s)" in - Linux) - check_nvidia - check_amd - check_intel - check_nvidia_wsl - ;; - Darwin) - check_metal - ;; - esac -} - -function detect_gpu_size() { - # Attempting to find GPU memory size for NVIDIA GPUs - if [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "nvidia" ]; then - echo "NVIDIA GPU detected. Attempting to find memory size..." - # Using head -n 1 to get the total memory of the 1st NVIDIA GPU detected. - # If handling multiple GPUs is required in the future, this is the place to do it - nvidia_sm=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -n 1) - if [ ! -z "$nvidia_sm" ]; then - echo "Total GPU Memory: $nvidia_sm MiB" - # if bigger than 8GB, use 16GB - #if [ "$nvidia_sm" -gt 8192 ]; then - # GPU_SIZE=gpu-16g - #else - GPU_SIZE=gpu-8g - #fi - else - echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU." - GPU_SIZE=gpu-8g - fi - elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then - GPU_SIZE=intel - # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs - elif [ "$GPU_ACCELERATION" = true ]; then - echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented." - GPU_SIZE=gpu-8g - - # default to cpu if GPU_SIZE is not set - else - echo "GPU acceleration is not enabled or supported. Defaulting to CPU." - GPU_SIZE=cpu - fi -} - -function check_vars() { - if [ -z "$MODELS" ]; then - echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load." - exit 1 - fi - - if [ -z "$PROFILE" ]; then - echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple" - exit 1 - fi -} - -detect_gpu -detect_gpu_size - -PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu -export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vad.yaml,/aio/${PROFILE}/vision.yaml}" - -check_vars - -echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS" - -exec /entrypoint.sh "$@" diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml deleted file mode 100644 index 0f88f4511..000000000 --- a/aio/gpu-8g/embeddings.yaml +++ /dev/null @@ -1,13 +0,0 @@ -embeddings: true -name: text-embedding-ada-002 -backend: llama-cpp -parameters: - model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf - -usage: | - You can test this model with curl like this: - - curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ - "input": "Your text string goes here", - "model": "text-embedding-ada-002" - }' \ No newline at end of file diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml deleted file mode 100644 index 0074aaf0e..000000000 --- a/aio/gpu-8g/image-gen.yaml +++ /dev/null @@ -1,25 +0,0 @@ -name: stablediffusion -parameters: - model: DreamShaper_8_pruned.safetensors -backend: diffusers -step: 25 -f16: true - -diffusers: - pipeline_type: StableDiffusionPipeline - cuda: true - enable_parameters: "negative_prompt,num_inference_steps" - scheduler_type: "k_dpmpp_2m" - -download_files: -- filename: DreamShaper_8_pruned.safetensors - uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors - -usage: | - curl http://localhost:8080/v1/images/generations \ - -H "Content-Type: application/json" \ - -d '{ - "prompt": "|", - "step": 25, - "size": "512x512" - }' \ No newline at end of file diff --git a/aio/gpu-8g/rerank.yaml b/aio/gpu-8g/rerank.yaml deleted file mode 100644 index 70d386b2b..000000000 --- a/aio/gpu-8g/rerank.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: jina-reranker-v1-base-en -reranking: true -f16: true -parameters: - model: jina-reranker-v1-tiny-en.f16.gguf -backend: llama-cpp -download_files: - - filename: jina-reranker-v1-tiny-en.f16.gguf - sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 - uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf - -usage: | - You can test this model with curl like this: - - curl http://localhost:8080/v1/rerank \ - -H "Content-Type: application/json" \ - -d '{ - "model": "jina-reranker-v1-base-en", - "query": "Organic skincare products for sensitive skin", - "documents": [ - "Eco-friendly kitchenware for modern homes", - "Biodegradable cleaning supplies for eco-conscious consumers", - "Organic cotton baby clothes for sensitive skin", - "Natural organic skincare range for sensitive skin", - "Tech gadgets for smart homes: 2024 edition", - "Sustainable gardening tools and compost solutions", - "Sensitive skin-friendly facial cleansers and toners", - "Organic food wraps and storage solutions", - "All-natural pet food for dogs with allergies", - "Yoga mats made from recycled materials" - ], - "top_n": 3 - }' diff --git a/aio/gpu-8g/speech-to-text.yaml b/aio/gpu-8g/speech-to-text.yaml deleted file mode 100644 index 77850d791..000000000 --- a/aio/gpu-8g/speech-to-text.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: whisper-1 -backend: whisper -parameters: - model: ggml-whisper-base.bin - -usage: | - ## example audio file - wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg - - ## Send the example audio file to the transcriptions endpoint - curl http://localhost:8080/v1/audio/transcriptions \ - -H "Content-Type: multipart/form-data" \ - -F file="@$PWD/gb1.ogg" -F model="whisper-1" - -download_files: -- filename: "ggml-whisper-base.bin" - sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" - uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/gpu-8g/text-to-speech.yaml b/aio/gpu-8g/text-to-speech.yaml deleted file mode 100644 index 782f8624a..000000000 --- a/aio/gpu-8g/text-to-speech.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: tts-1 -download_files: - - filename: voice-en-us-amy-low.tar.gz - uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz -backend: piper -parameters: - model: en-us-amy-low.onnx - -usage: | - To test if this model works as expected, you can use the following curl command: - - curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "model":"tts-1", - "input": "Hi, this is a test." - }' \ No newline at end of file diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml deleted file mode 100644 index 7d5c991c9..000000000 --- a/aio/gpu-8g/text-to-text.yaml +++ /dev/null @@ -1,54 +0,0 @@ -context_size: 4096 -f16: true -backend: llama-cpp -function: - capture_llm_results: - - (?s)(.*?) - grammar: - properties_order: name,arguments - json_regex_match: - - (?s)(.*?) - replace_llm_results: - - key: (?s)(.*?) - value: "" -mmap: true -name: gpt-4 -parameters: - model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf -stopwords: -- <|im_end|> -- -- -template: - chat: | - {{.Input -}} - <|im_start|>assistant - chat_message: | - <|im_start|>{{ .RoleName }} - {{ if .FunctionCall -}} - Function call: - {{ else if eq .RoleName "tool" -}} - Function response: - {{ end -}} - {{ if .Content -}} - {{.Content }} - {{ end -}} - {{ if .FunctionCall -}} - {{toJson .FunctionCall}} - {{ end -}}<|im_end|> - completion: | - {{.Input}} - function: | - <|im_start|>system - You are an AI assistant that executes function calls, and these are the tools at your disposal: - {{range .Functions}} - {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} - {{end}} - <|im_end|> - {{.Input -}} - <|im_start|>assistant - -download_files: -- filename: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf - sha256: 4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4 - uri: huggingface://mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf diff --git a/aio/gpu-8g/vad.yaml b/aio/gpu-8g/vad.yaml deleted file mode 100644 index b0dc70d75..000000000 --- a/aio/gpu-8g/vad.yaml +++ /dev/null @@ -1,8 +0,0 @@ -backend: silero-vad -name: silero-vad -parameters: - model: silero-vad.onnx -download_files: -- filename: silero-vad.onnx - uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx - sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808 \ No newline at end of file diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml deleted file mode 100644 index 5c2d9930c..000000000 --- a/aio/gpu-8g/vision.yaml +++ /dev/null @@ -1,50 +0,0 @@ -context_size: 4096 -backend: llama-cpp -f16: true -mmap: true -mmproj: minicpm-v-4_5-mmproj-f16.gguf -name: gpt-4o -parameters: - model: minicpm-v-4_5-Q4_K_M.gguf -stopwords: -- <|im_end|> -- -- -- <|endoftext|> -template: - chat: | - {{.Input -}} - <|im_start|>assistant - chat_message: | - <|im_start|>{{ .RoleName }} - {{ if .FunctionCall -}} - Function call: - {{ else if eq .RoleName "tool" -}} - Function response: - {{ end -}} - {{ if .Content -}} - {{.Content }} - {{ end -}} - {{ if .FunctionCall -}} - {{toJson .FunctionCall}} - {{ end -}}<|im_end|> - completion: | - {{.Input}} - function: | - <|im_start|>system - You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: - {{range .Functions}} - {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} - {{end}} - For each function call return a json object with function name and arguments - <|im_end|> - {{.Input -}} - <|im_start|>assistant - -download_files: -- filename: minicpm-v-4_5-Q4_K_M.gguf - sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f - uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf -- filename: minicpm-v-4_5-mmproj-f16.gguf - uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf - sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8 \ No newline at end of file diff --git a/aio/intel/embeddings.yaml b/aio/intel/embeddings.yaml deleted file mode 100644 index 0f88f4511..000000000 --- a/aio/intel/embeddings.yaml +++ /dev/null @@ -1,13 +0,0 @@ -embeddings: true -name: text-embedding-ada-002 -backend: llama-cpp -parameters: - model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf - -usage: | - You can test this model with curl like this: - - curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ - "input": "Your text string goes here", - "model": "text-embedding-ada-002" - }' \ No newline at end of file diff --git a/aio/intel/image-gen.yaml b/aio/intel/image-gen.yaml deleted file mode 100644 index 45fe6b62d..000000000 --- a/aio/intel/image-gen.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: stablediffusion -parameters: - model: Lykon/dreamshaper-8 -backend: diffusers -step: 25 -f16: true -diffusers: - pipeline_type: StableDiffusionPipeline - cuda: true - enable_parameters: "negative_prompt,num_inference_steps" - scheduler_type: "k_dpmpp_2m" - -usage: | - curl http://localhost:8080/v1/images/generations \ - -H "Content-Type: application/json" \ - -d '{ - "prompt": "|", - "step": 25, - "size": "512x512" - }' \ No newline at end of file diff --git a/aio/intel/rerank.yaml b/aio/intel/rerank.yaml deleted file mode 100644 index 70d386b2b..000000000 --- a/aio/intel/rerank.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: jina-reranker-v1-base-en -reranking: true -f16: true -parameters: - model: jina-reranker-v1-tiny-en.f16.gguf -backend: llama-cpp -download_files: - - filename: jina-reranker-v1-tiny-en.f16.gguf - sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 - uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf - -usage: | - You can test this model with curl like this: - - curl http://localhost:8080/v1/rerank \ - -H "Content-Type: application/json" \ - -d '{ - "model": "jina-reranker-v1-base-en", - "query": "Organic skincare products for sensitive skin", - "documents": [ - "Eco-friendly kitchenware for modern homes", - "Biodegradable cleaning supplies for eco-conscious consumers", - "Organic cotton baby clothes for sensitive skin", - "Natural organic skincare range for sensitive skin", - "Tech gadgets for smart homes: 2024 edition", - "Sustainable gardening tools and compost solutions", - "Sensitive skin-friendly facial cleansers and toners", - "Organic food wraps and storage solutions", - "All-natural pet food for dogs with allergies", - "Yoga mats made from recycled materials" - ], - "top_n": 3 - }' diff --git a/aio/intel/speech-to-text.yaml b/aio/intel/speech-to-text.yaml deleted file mode 100644 index 77850d791..000000000 --- a/aio/intel/speech-to-text.yaml +++ /dev/null @@ -1,18 +0,0 @@ -name: whisper-1 -backend: whisper -parameters: - model: ggml-whisper-base.bin - -usage: | - ## example audio file - wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg - - ## Send the example audio file to the transcriptions endpoint - curl http://localhost:8080/v1/audio/transcriptions \ - -H "Content-Type: multipart/form-data" \ - -F file="@$PWD/gb1.ogg" -F model="whisper-1" - -download_files: -- filename: "ggml-whisper-base.bin" - sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" - uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" \ No newline at end of file diff --git a/aio/intel/text-to-speech.yaml b/aio/intel/text-to-speech.yaml deleted file mode 100644 index 782f8624a..000000000 --- a/aio/intel/text-to-speech.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: tts-1 -download_files: - - filename: voice-en-us-amy-low.tar.gz - uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz -backend: piper -parameters: - model: en-us-amy-low.onnx - -usage: | - To test if this model works as expected, you can use the following curl command: - - curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ - "model":"tts-1", - "input": "Hi, this is a test." - }' \ No newline at end of file diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml deleted file mode 100644 index 9fe7c1143..000000000 --- a/aio/intel/text-to-text.yaml +++ /dev/null @@ -1,54 +0,0 @@ -context_size: 4096 -f16: true -backend: llama-cpp -function: - capture_llm_results: - - (?s)(.*?) - grammar: - properties_order: name,arguments - json_regex_match: - - (?s)(.*?) - replace_llm_results: - - key: (?s)(.*?) - value: "" -mmap: true -name: gpt-4 -parameters: - model: localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf -stopwords: -- <|im_end|> -- -- -template: - chat: | - {{.Input -}} - <|im_start|>assistant - chat_message: | - <|im_start|>{{ .RoleName }} - {{ if .FunctionCall -}} - Function call: - {{ else if eq .RoleName "tool" -}} - Function response: - {{ end -}} - {{ if .Content -}} - {{.Content }} - {{ end -}} - {{ if .FunctionCall -}} - {{toJson .FunctionCall}} - {{ end -}}<|im_end|> - completion: | - {{.Input}} - function: | - <|im_start|>system - You are an AI assistant that executes function calls, and these are the tools at your disposal: - {{range .Functions}} - {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} - {{end}} - <|im_end|> - {{.Input -}} - <|im_start|>assistant - -download_files: -- filename: localai-functioncall-phi-4-v0.3-q4_k_m.gguf - sha256: 23fee048ded2a6e2e1a7b6bbefa6cbf83068f194caa9552aecbaa00fec8a16d5 - uri: huggingface://mudler/LocalAI-functioncall-phi-4-v0.3-Q4_K_M-GGUF/localai-functioncall-phi-4-v0.3-q4_k_m.gguf \ No newline at end of file diff --git a/docs/content/features/GPU-acceleration.md b/docs/content/features/GPU-acceleration.md index 022e7a73f..ef5ec5328 100644 --- a/docs/content/features/GPU-acceleration.md +++ b/docs/content/features/GPU-acceleration.md @@ -206,7 +206,7 @@ The following are examples of the ROCm specific configuration elements required. ```yaml # For full functionality select a non-'core' image, version locking the image is recommended for debug purposes. - image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas + image: quay.io/go-skynet/local-ai:master-gpu-hipblas environment: - DEBUG=true # If your gpu is not already included in the current list of default targets the following build details are required. @@ -229,13 +229,11 @@ docker run \ -e GPU_TARGETS=gfx906 \ --device /dev/dri \ --device /dev/kfd \ - quay.io/go-skynet/local-ai:master-aio-gpu-hipblas + quay.io/go-skynet/local-ai:master-gpu-hipblas ``` Please ensure to add all other required environment variables, port forwardings, etc to your `compose` file or `run` command. -The rebuild process will take some time to complete when deploying these containers and it is recommended that you `pull` the image prior to deployment as depending on the version these images may be ~20GB in size. - #### Example (k8s) (Advanced Deployment/WIP) For k8s deployments there is an additional step required before deployment, this is the deployment of the [ROCm/k8s-device-plugin](https://artifacthub.io/packages/helm/amd-gpu-helm/amd-gpu). @@ -434,7 +432,7 @@ If your AMD GPU is not in the default target list, set `REBUILD=true` and `GPU_T ```bash docker run -e REBUILD=true -e BUILD_TYPE=hipblas -e GPU_TARGETS=gfx1030 \ --device /dev/dri --device /dev/kfd \ - quay.io/go-skynet/local-ai:master-aio-gpu-hipblas + quay.io/go-skynet/local-ai:master-gpu-hipblas ``` ### Intel SYCL: model hangs diff --git a/docs/content/features/gpt-vision.md b/docs/content/features/gpt-vision.md index 1652aac1a..79dcd8046 100644 --- a/docs/content/features/gpt-vision.md +++ b/docs/content/features/gpt-vision.md @@ -32,6 +32,4 @@ Grammars and function tools can be used as well in conjunction with vision APIs: ### Setup -All-in-One images have already shipped the llava model as `gpt-4-vision-preview`, so no setup is needed in this case. - To setup the LLaVa models, follow the full example in the [configuration examples](https://github.com/mudler/LocalAI-examples/blob/main/configurations/llava/llava.yaml). \ No newline at end of file diff --git a/docs/content/getting-started/container-images.md b/docs/content/getting-started/container-images.md index 7ea98965d..92d2883c6 100644 --- a/docs/content/getting-started/container-images.md +++ b/docs/content/getting-started/container-images.md @@ -8,8 +8,6 @@ ico = "rocket_launch" LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Docker Hub](https://hub.docker.com/r/localai/localai). -All-in-One images comes with a pre-configured set of models and backends, standard images instead do not have any model pre-configured and installed. - For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "installation/build" %}}). {{% notice tip %}} @@ -17,7 +15,6 @@ For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA **Available Images Types**: - Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn` or `rwkv` backends - if you are not sure which one to use, do **not** use these images. -- Images containing the `aio` tag are all-in-one images with all the features enabled, and come with an opinionated set of configuration. {{% /notice %}} @@ -124,109 +121,6 @@ These images are compatible with Nvidia ARM64 devices with CUDA 13, such as the {{< /tabs >}} -## All-in-one images - -All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and require no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size. - -In the AIO images there are models configured with the names of OpenAI models, however, they are really backed by Open Source models. You can find the table below - -| Category | Model name | Real model (CPU) | Real model (GPU) | -| ---- | ---- | ---- | ---- | -| Text Generation | `gpt-4` | `phi-2` | `hermes-2-pro-mistral` | -| Multimodal Vision | `gpt-4-vision-preview` | `bakllava` | `llava-1.6-mistral` | -| Image Generation | `stablediffusion` | `stablediffusion` | `dreamshaper-8` | -| Speech to Text | `whisper-1` | `whisper` with `whisper-base` model | <= same | -| Text to Speech | `tts-1` | `en-us-amy-low.onnx` from `rhasspy/piper` | <= same | -| Embeddings | `text-embedding-ada-002` | `all-MiniLM-L6-v2` in Q4 | `all-MiniLM-L6-v2` | - -### Usage - -Select the image (CPU or GPU) and start the container with Docker: - -```bash -docker run -p 8080:8080 --name local-ai -ti localai/localai:latest-aio-cpu -``` - -LocalAI will automatically download all the required models, and the API will be available at [localhost:8080](http://localhost:8080/v1/models). - - -Or with a docker-compose file: - -```yaml -version: "3.9" -services: - api: - image: localai/localai:latest-aio-cpu - # For a specific version: - # image: localai/localai:{{< version >}}-aio-cpu - # For Nvidia GPUs decomment one of the following (cuda12 or cuda13): - # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12 - # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-13 - # image: localai/localai:latest-aio-gpu-nvidia-cuda-12 - # image: localai/localai:latest-aio-gpu-nvidia-cuda-13 - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] - interval: 1m - timeout: 20m - retries: 5 - ports: - - 8080:8080 - environment: - - DEBUG=true - # ... - volumes: - - ./models:/models:cached - # decomment the following piece if running with Nvidia GPUs - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: [gpu] -``` - -{{% notice tip %}} - -**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images. - -You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). - -If you want to use a named model or a local directory, you can mount it as a volume to `/models`: - -```bash -docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu -``` - -or associate a volume: - -```bash -docker volume create localai-models -docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu -``` - - {{% /notice %}} - -### Available AIO images - -| Description | Quay | Docker Hub | -| --- | --- |-----------------------------------------------| -| Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | -| Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | -| Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | -| Latest images for Nvidia GPU (CUDA13) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-13` | `localai/localai:latest-aio-gpu-nvidia-cuda-13` | -| Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | -| Latest images for Intel GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-intel` | `localai/localai:latest-aio-gpu-intel` | - -### Available environment variables - -The AIO Images are inheriting the same environment variables as the base images and the environment of LocalAI (that you can inspect by calling `--help`). However, it supports additional environment variables available only from the container image - -| Variable | Default | Description | -| ---------------------| ------- | ----------- | -| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` | -| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "getting-started/models" %}})) | - ## See Also - [GPU acceleration]({{%relref "features/gpu-acceleration" %}}) diff --git a/docs/content/getting-started/try-it-out.md b/docs/content/getting-started/try-it-out.md index 704685c84..8c2395e89 100644 --- a/docs/content/getting-started/try-it-out.md +++ b/docs/content/getting-started/try-it-out.md @@ -20,7 +20,7 @@ With the CLI you can list the models with `local-ai models list` and install the You can also [run models manually]({{%relref "getting-started/models" %}}) by copying files into the `models` directory. {{% /notice %}} -You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed. +You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are examples - replace them with the model names you have installed. ### Text Generation diff --git a/docs/content/installation/_index.en.md b/docs/content/installation/_index.en.md index a9216be5a..266033dfd 100644 --- a/docs/content/installation/_index.en.md +++ b/docs/content/installation/_index.en.md @@ -30,7 +30,7 @@ docker run -p 8080:8080 --name local-ai -ti localai/localai:latest podman run -p 8080:8080 --name local-ai -ti localai/localai:latest ``` -This will start LocalAI. The API will be available at `http://localhost:8080`. For images with pre-configured models, see [All-in-One images](/getting-started/container-images/#all-in-one-images). +This will start LocalAI. The API will be available at `http://localhost:8080`. For other platforms: - **macOS**: Download the [DMG](macos/) diff --git a/docs/content/installation/containers.md b/docs/content/installation/containers.md index 3d416a581..9045ed586 100644 --- a/docs/content/installation/containers.md +++ b/docs/content/installation/containers.md @@ -93,48 +93,6 @@ CUDA 13 (for Nvidia DGX Spark): docker run -ti --name local-ai -p 8080:8080 --runtime nvidia --gpus all localai/localai:latest-nvidia-l4t-arm64-cuda-13 ``` -### All-in-One (AIO) Images - -**Recommended for beginners** - These images come pre-configured with models and backends, ready to use immediately. - -#### CPU Image - -```bash -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu -# Or with Podman: -podman run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu -``` - -#### GPU Images - -**NVIDIA CUDA 13:** -```bash -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-13 -# Or with Podman: -podman run -ti --name local-ai -p 8080:8080 --device nvidia.com/gpu=all localai/localai:latest-aio-gpu-nvidia-cuda-13 -``` - -**NVIDIA CUDA 12:** -```bash -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 -# Or with Podman: -podman run -ti --name local-ai -p 8080:8080 --device nvidia.com/gpu=all localai/localai:latest-aio-gpu-nvidia-cuda-12 -``` - -**AMD GPU (ROCm):** -```bash -docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas -# Or with Podman: -podman run -ti --name local-ai -p 8080:8080 --device rocm.com/gpu=all localai/localai:latest-aio-gpu-hipblas -``` - -**Intel GPU:** -```bash -docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel -# Or with Podman: -podman run -ti --name local-ai -p 8080:8080 --device gpu.intel.com/all localai/localai:latest-aio-gpu-intel -``` - ## Using Compose For a more manageable setup, especially with persistent volumes, use Docker Compose or Podman Compose: @@ -147,8 +105,8 @@ The CDI approach is recommended for newer versions of the NVIDIA Container Toolk version: "3.9" services: api: - image: localai/localai:latest-aio-gpu-nvidia-cuda-12 - # For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13 + image: localai/localai:latest-gpu-nvidia-cuda-12 + # For CUDA 13, use: localai/localai:latest-gpu-nvidia-cuda-13 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] interval: 1m @@ -187,8 +145,8 @@ If you are using an older version of the NVIDIA Container Toolkit (before 1.14), version: "3.9" services: api: - image: localai/localai:latest-aio-gpu-nvidia-cuda-12 - # For CUDA 13, use: localai/localai:latest-aio-gpu-nvidia-cuda-13 + image: localai/localai:latest-gpu-nvidia-cuda-12 + # For CUDA 13, use: localai/localai:latest-gpu-nvidia-cuda-13 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"] interval: 1m @@ -227,12 +185,12 @@ To persist models and data, mount volumes: docker run -ti --name local-ai -p 8080:8080 \ -v $PWD/models:/models \ -v $PWD/data:/data \ - localai/localai:latest-aio-cpu + localai/localai:latest # Or with Podman: podman run -ti --name local-ai -p 8080:8080 \ -v $PWD/models:/models \ -v $PWD/data:/data \ - localai/localai:latest-aio-cpu + localai/localai:latest ``` Or use named volumes: @@ -243,29 +201,16 @@ docker volume create localai-data docker run -ti --name local-ai -p 8080:8080 \ -v localai-models:/models \ -v localai-data:/data \ - localai/localai:latest-aio-cpu + localai/localai:latest # Or with Podman: podman volume create localai-models podman volume create localai-data podman run -ti --name local-ai -p 8080:8080 \ -v localai-models:/models \ -v localai-data:/data \ - localai/localai:latest-aio-cpu + localai/localai:latest ``` -## What's Included in AIO Images - -All-in-One images come pre-configured with: - -- **Text Generation**: LLM models for chat and completion -- **Image Generation**: Stable Diffusion models -- **Text to Speech**: TTS models -- **Speech to Text**: Whisper models -- **Embeddings**: Vector embedding models -- **Function Calling**: Support for OpenAI-compatible function calling - -The AIO images use OpenAI-compatible model names (like `gpt-4`, `gpt-4-vision-preview`) but are backed by open-source models. See the [container images documentation](/getting-started/container-images/#all-in-one-images) for the complete mapping. - ## Next Steps After installation: diff --git a/tests/e2e-aio/models/embeddings.yaml b/tests/e2e-aio/models/embeddings.yaml new file mode 100644 index 000000000..8613f2c33 --- /dev/null +++ b/tests/e2e-aio/models/embeddings.yaml @@ -0,0 +1,5 @@ +embeddings: true +name: text-embedding-ada-002 +backend: llama-cpp +parameters: + model: huggingface://bartowski/granite-embedding-107m-multilingual-GGUF/granite-embedding-107m-multilingual-f16.gguf diff --git a/aio/cpu/image-gen.yaml b/tests/e2e-aio/models/image-gen.yaml similarity index 62% rename from aio/cpu/image-gen.yaml rename to tests/e2e-aio/models/image-gen.yaml index ef3745726..9fcb8e1c8 100644 --- a/aio/cpu/image-gen.yaml +++ b/tests/e2e-aio/models/image-gen.yaml @@ -12,12 +12,3 @@ download_files: - filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f" uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf" - -usage: | - curl http://localhost:8080/v1/images/generations \ - -H "Content-Type: application/json" \ - -d '{ - "prompt": "|", - "step": 25, - "size": "512x512" - }' \ No newline at end of file diff --git a/tests/e2e-aio/models/rerank.yaml b/tests/e2e-aio/models/rerank.yaml new file mode 100644 index 000000000..c97540dbf --- /dev/null +++ b/tests/e2e-aio/models/rerank.yaml @@ -0,0 +1,10 @@ +name: jina-reranker-v1-base-en +reranking: true +f16: true +parameters: + model: jina-reranker-v1-tiny-en.f16.gguf +backend: llama-cpp +download_files: + - filename: jina-reranker-v1-tiny-en.f16.gguf + sha256: 5f696cf0d0f3d347c4a279eee8270e5918554cdac0ed1f632f2619e4e8341407 + uri: huggingface://mradermacher/jina-reranker-v1-tiny-en-GGUF/jina-reranker-v1-tiny-en.f16.gguf diff --git a/tests/e2e-aio/models/speech-to-text.yaml b/tests/e2e-aio/models/speech-to-text.yaml new file mode 100644 index 000000000..b187c309d --- /dev/null +++ b/tests/e2e-aio/models/speech-to-text.yaml @@ -0,0 +1,9 @@ +name: whisper-1 +backend: whisper +parameters: + model: ggml-whisper-base.bin + +download_files: +- filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" diff --git a/tests/e2e-aio/models/text-to-speech.yaml b/tests/e2e-aio/models/text-to-speech.yaml new file mode 100644 index 000000000..613301b66 --- /dev/null +++ b/tests/e2e-aio/models/text-to-speech.yaml @@ -0,0 +1,7 @@ +name: tts-1 +download_files: + - filename: voice-en-us-amy-low.tar.gz + uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz +backend: piper +parameters: + model: en-us-amy-low.onnx diff --git a/aio/cpu/text-to-text.yaml b/tests/e2e-aio/models/text-to-text.yaml similarity index 98% rename from aio/cpu/text-to-text.yaml rename to tests/e2e-aio/models/text-to-text.yaml index 19ed1f440..321c83c13 100644 --- a/aio/cpu/text-to-text.yaml +++ b/tests/e2e-aio/models/text-to-text.yaml @@ -55,4 +55,4 @@ template: download_files: - filename: Hermes-3-Llama-3.2-3B-Q4_K_M.gguf sha256: 2e220a14ba4328fee38cf36c2c068261560f999fadb5725ce5c6d977cb5126b5 - uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf \ No newline at end of file + uri: huggingface://bartowski/Hermes-3-Llama-3.2-3B-GGUF/Hermes-3-Llama-3.2-3B-Q4_K_M.gguf diff --git a/aio/intel/vad.yaml b/tests/e2e-aio/models/vad.yaml similarity index 94% rename from aio/intel/vad.yaml rename to tests/e2e-aio/models/vad.yaml index b0dc70d75..858b65c0a 100644 --- a/aio/intel/vad.yaml +++ b/tests/e2e-aio/models/vad.yaml @@ -1,8 +1,8 @@ -backend: silero-vad -name: silero-vad -parameters: - model: silero-vad.onnx -download_files: -- filename: silero-vad.onnx - uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx - sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808 \ No newline at end of file +backend: silero-vad +name: silero-vad +parameters: + model: silero-vad.onnx +download_files: +- filename: silero-vad.onnx + uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx + sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808 diff --git a/aio/intel/vision.yaml b/tests/e2e-aio/models/vision.yaml similarity index 99% rename from aio/intel/vision.yaml rename to tests/e2e-aio/models/vision.yaml index 00b8c0680..487f4663c 100644 --- a/aio/intel/vision.yaml +++ b/tests/e2e-aio/models/vision.yaml @@ -1,6 +1,6 @@ context_size: 4096 -backend: llama-cpp f16: true +backend: llama-cpp mmap: true mmproj: minicpm-v-4_5-mmproj-f16.gguf name: gpt-4o @@ -41,11 +41,10 @@ template: {{.Input -}} <|im_start|>assistant - download_files: - filename: minicpm-v-4_5-Q4_K_M.gguf sha256: c1c3c33100b15b4caf7319acce4e23c0eb0ce1cbd12f70e8d24f05aa67b7512f uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/ggml-model-Q4_K_M.gguf - filename: minicpm-v-4_5-mmproj-f16.gguf uri: huggingface://openbmb/MiniCPM-V-4_5-gguf/mmproj-model-f16.gguf - sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8 \ No newline at end of file + sha256: 7a7225a32e8d453aaa3d22d8c579b5bf833c253f784cdb05c99c9a76fd616df8