From e58410fa99996d9927b06d5d1cab0e072486edac Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 26 Mar 2024 18:45:25 +0100
Subject: [PATCH] feat(aio): add intel profile (#1901)

* feat(aio): add intel profile

* docs: clarify AIO images features
---
 README.md                                 |  4 ++
 aio/cpu/embeddings.yaml                   |  8 +---
 aio/cpu/image-gen.yaml                    | 11 ++++-
 aio/cpu/vision.yaml                       |  9 ----
 aio/entrypoint.sh                         |  4 +-
 aio/gpu-8g/embeddings.yaml                |  1 -
 aio/gpu-8g/image-gen.yaml                 |  1 -
 aio/gpu-8g/vision.yaml                    |  2 -
 aio/intel/embeddings.yaml                 | 12 ++++++
 aio/intel/image-gen.yaml                  | 20 +++++++++
 aio/intel/speech-to-text.yaml             | 18 ++++++++
 aio/intel/text-to-speech.yaml             | 15 +++++++
 aio/intel/text-to-text.yaml               | 51 +++++++++++++++++++++++
 aio/intel/vision.yaml                     | 35 ++++++++++++++++
 docs/content/docs/overview.md             |  1 -
 docs/content/docs/reference/aio-images.md |  9 ++++
 16 files changed, 178 insertions(+), 23 deletions(-)
 create mode 100644 aio/intel/embeddings.yaml
 create mode 100644 aio/intel/image-gen.yaml
 create mode 100644 aio/intel/speech-to-text.yaml
 create mode 100644 aio/intel/text-to-speech.yaml
 create mode 100644 aio/intel/text-to-text.yaml
 create mode 100644 aio/intel/vision.yaml
diff --git a/README.md b/README.md
index 7ba96ad5a..8cf15d5ab 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,10 @@
 </a>
 </p>
 
+> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
+>
+> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
+
 [![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml)[![Build and Release](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/release.yaml)[![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)[![Bump dependencies](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/bump_deps.yaml)[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/localai)](https://artifacthub.io/packages/search?repo=localai)
 
 **LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU.
diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml
index bdee079c9..8576746f1 100644
--- a/aio/cpu/embeddings.yaml
+++ b/aio/cpu/embeddings.yaml
@@ -1,11 +1,5 @@
-backend: bert-embeddings
-embeddings: true
-f16: true
-
-gpu_layers: 90
-mmap: true
 name: text-embedding-ada-002
-
+backend: bert-embeddings
 parameters:
   model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
 
diff --git a/aio/cpu/image-gen.yaml b/aio/cpu/image-gen.yaml
index 3b9c2eec1..9de88a3f3 100644
--- a/aio/cpu/image-gen.yaml
+++ b/aio/cpu/image-gen.yaml
@@ -50,4 +50,13 @@ download_files:
   uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
 - filename: "stablediffusion_assets/vocab.txt"
   sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
-  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
\ No newline at end of file
+  uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
+
+usage: |
+        curl http://localhost:8080/v1/images/generations \
+          -H "Content-Type: application/json" \
+          -d '{
+            "prompt": "<positive prompt>|<negative prompt>",
+            "step": 25,
+            "size": "512x512"
+          }'
\ No newline at end of file
diff --git a/aio/cpu/vision.yaml b/aio/cpu/vision.yaml
index 0777f715d..3b466d377 100644
--- a/aio/cpu/vision.yaml
+++ b/aio/cpu/vision.yaml
@@ -1,8 +1,6 @@
 backend: llama-cpp
 context_size: 4096
 f16: true
-
-gpu_layers: 90
 mmap: true
 name: gpt-4-vision-preview
 
@@ -14,13 +12,6 @@ roles:
 mmproj: bakllava-mmproj.gguf
 parameters:
   model: bakllava.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
 
 template:
   chat: |
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index 795cb86a1..d04e5642f 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -30,6 +30,7 @@ function detect_gpu() {
                 echo "Intel GPU detected"
                 if [ -d /opt/intel ]; then
                     GPU_ACCELERATION=true
+                    GPU_VENDOR=intel
                 else
                     echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
                 fi
@@ -75,7 +76,8 @@ function detect_gpu_size() {
             echo "Unable to determine NVIDIA GPU memory size. Falling back to CPU."
             GPU_SIZE=gpu-8g
         fi
-
+    elif [ "$GPU_ACCELERATION" = true ] && [ "$GPU_VENDOR" = "intel" ]; then
+        GPU_SIZE=intel
     # Default to a generic GPU size until we implement GPU size detection for non NVIDIA GPUs
     elif [ "$GPU_ACCELERATION" = true ]; then
         echo "Non-NVIDIA GPU detected. Specific GPU memory size detection is not implemented."
diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml
index 98b519d5a..99a74ef74 100644
--- a/aio/gpu-8g/embeddings.yaml
+++ b/aio/gpu-8g/embeddings.yaml
@@ -1,6 +1,5 @@
 name: text-embedding-ada-002
 backend: sentencetransformers
-embeddings: true
 parameters:
   model: all-MiniLM-L6-v2
 
diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml
index 53994ebbb..0074aaf0e 100644
--- a/aio/gpu-8g/image-gen.yaml
+++ b/aio/gpu-8g/image-gen.yaml
@@ -20,7 +20,6 @@ usage: |
           -H "Content-Type: application/json" \
           -d '{
             "prompt": "<positive prompt>|<negative prompt>",
-            "model": "dreamshaper",
             "step": 25,
             "size": "512x512"
           }'
\ No newline at end of file
diff --git a/aio/gpu-8g/vision.yaml b/aio/gpu-8g/vision.yaml
index 025425032..db0392794 100644
--- a/aio/gpu-8g/vision.yaml
+++ b/aio/gpu-8g/vision.yaml
@@ -1,8 +1,6 @@
 backend: llama-cpp
 context_size: 4096
 f16: true
-
-gpu_layers: 90
 mmap: true
 name: gpt-4-vision-preview
 
diff --git a/aio/intel/embeddings.yaml b/aio/intel/embeddings.yaml
new file mode 100644
index 000000000..99a74ef74
--- /dev/null
+++ b/aio/intel/embeddings.yaml
@@ -0,0 +1,12 @@
+name: text-embedding-ada-002
+backend: sentencetransformers
+parameters:
+  model: all-MiniLM-L6-v2
+
+usage: |
+    You can test this model with curl like this:
+
+    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
+      "input": "Your text string goes here",
+      "model": "text-embedding-ada-002"
+    }'
\ No newline at end of file
diff --git a/aio/intel/image-gen.yaml b/aio/intel/image-gen.yaml
new file mode 100644
index 000000000..eb724c927
--- /dev/null
+++ b/aio/intel/image-gen.yaml
@@ -0,0 +1,20 @@
+name: stablediffusion
+parameters:
+  model: runwayml/stable-diffusion-v1-5
+backend: diffusers
+step: 25
+f16: true
+diffusers:
+  pipeline_type: StableDiffusionPipeline
+  cuda: true
+  enable_parameters: "negative_prompt,num_inference_steps"
+  scheduler_type: "k_dpmpp_2m"
+
+usage: |
+        curl http://localhost:8080/v1/images/generations \
+          -H "Content-Type: application/json" \
+          -d '{
+            "prompt": "<positive prompt>|<negative prompt>",
+            "step": 25,
+            "size": "512x512"
+          }'
\ No newline at end of file
diff --git a/aio/intel/speech-to-text.yaml b/aio/intel/speech-to-text.yaml
new file mode 100644
index 000000000..77850d791
--- /dev/null
+++ b/aio/intel/speech-to-text.yaml
@@ -0,0 +1,18 @@
+name: whisper-1
+backend: whisper
+parameters:
+  model: ggml-whisper-base.bin
+
+usage: |
+    ## example audio file
+    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
+
+    ## Send the example audio file to the transcriptions endpoint
+    curl http://localhost:8080/v1/audio/transcriptions \
+         -H "Content-Type: multipart/form-data" \
+         -F file="@$PWD/gb1.ogg" -F model="whisper-1"
+
+download_files:
+- filename: "ggml-whisper-base.bin"
+  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
\ No newline at end of file
diff --git a/aio/intel/text-to-speech.yaml b/aio/intel/text-to-speech.yaml
new file mode 100644
index 000000000..8d875a299
--- /dev/null
+++ b/aio/intel/text-to-speech.yaml
@@ -0,0 +1,15 @@
+name: tts-1
+download_files:
+  - filename: voice-en-us-amy-low.tar.gz
+    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
+
+parameters:
+  model: en-us-amy-low.onnx
+
+usage: |
+    To test if this model works as expected, you can use the following curl command:
+
+    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
+      "model":"tts-1",
+      "input": "Hi, this is a test."
+    }'
\ No newline at end of file
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
new file mode 100644
index 000000000..ef36b562d
--- /dev/null
+++ b/aio/intel/text-to-text.yaml
@@ -0,0 +1,51 @@
+name: gpt-4
+mmap: false
+f16: false
+parameters:
+  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
+
+roles:
+  assistant_function_call: assistant
+  function: tool
+template:
+  chat_message: |
+    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
+    {{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
+    {{ if eq .RoleName "function" }}<tool_result>{{end}}
+    {{if .Content}}{{.Content}}{{end}}
+    {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
+    {{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
+    {{ if eq .RoleName "function" }}</tool_result>{{end}}
+    <|im_end|>
+  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+  function: |
+    <|im_start|>system
+    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 
+    <tools>
+    {{range .Functions}}
+    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+    {{end}}
+    </tools> 
+    Use the following pydantic model json schema for each tool call you will make: 
+    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} 
+    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+    <tool_call>
+    {'arguments': <args-dict>, 'name': <function-name>}
+    </tool_call><|im_end|>
+    {{.Input}}
+    <|im_start|>assistant
+    <tool_call>
+  chat: |
+    {{.Input}}
+    <|im_start|>assistant
+  completion: |
+    {{.Input}}
+context_size: 4096
+stopwords:
+- <|im_end|>
+- <dummy32000>
+usage: |
+      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+          "model": "gpt-4",
+          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
+      }'
diff --git a/aio/intel/vision.yaml b/aio/intel/vision.yaml
new file mode 100644
index 000000000..528431626
--- /dev/null
+++ b/aio/intel/vision.yaml
@@ -0,0 +1,35 @@
+backend: llama-cpp
+context_size: 4096
+mmap: false
+f16: false
+name: gpt-4-vision-preview
+
+roles:
+  user: "USER:"
+  assistant: "ASSISTANT:"
+  system: "SYSTEM:"
+
+mmproj: llava-v1.6-7b-mmproj-f16.gguf
+parameters:
+  model: llava-v1.6-mistral-7b.Q5_K_M.gguf
+  temperature: 0.2
+  top_k: 40
+  top_p: 0.95
+  seed: -1
+
+template:
+  chat: |
+    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
+    {{.Input}}
+    ASSISTANT:
+
+download_files:
+- filename: llava-v1.6-mistral-7b.Q5_K_M.gguf
+  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf
+- filename: llava-v1.6-7b-mmproj-f16.gguf
+  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
+
+usage: |
+    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+        "model": "gpt-4-vision-preview",
+        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 40ec9e4f7..3c3a397d1 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -49,7 +49,6 @@ icon = "info"
 </a>
 </p>
 
-
 > 💡 Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
 >
 > [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md
index 9c569fb5d..331892e92 100644
--- a/docs/content/docs/reference/aio-images.md
+++ b/docs/content/docs/reference/aio-images.md
@@ -7,6 +7,15 @@ weight = 26
 
 All-In-One images are images that come pre-configured with a set of models and backends to fully leverage almost all the LocalAI featureset. These images are available for both CPU and GPU environments. The AIO images are designed to be easy to use and requires no configuration. Models configuration can be found [here](https://github.com/mudler/LocalAI/tree/master/aio) separated by size.
 
+What you can find configured out of the box:
+
+- Image generation
+- Text generation
+- Text to audio
+- Audio transcription
+- Embeddings
+- GPT Vision
+
 
 | Description | Quay | Docker Hub                                   |
 | --- | --- |-----------------------------------------------|