pin go-rwkv

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Revert "⬆️ Update donomii/go-rwkv.cpp" (#1474 )
2026-02-03 11:13:31 -05:00 · 2023-12-21 08:42:40 +01:00 · 2023-12-21 08:38:50 +01:00 · 2023-12-21 08:35:31 +01:00 · 2023-12-21 08:35:15 +01:00 · 2023-12-20 18:02:52 +01:00
43 changed files with 579 additions and 597 deletions
--- a/.github/bump_docs.sh
+++ b/.github/bump_docs.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -xe
+REPO=$1
+
+LATEST_TAG=$(curl -s "https://api.github.com/repos/$REPO/releases/latest" | jq -r '.name')
+
+cat <<< $(jq ".version = \"$LATEST_TAG\"" docs/data/version.json) > docs/data/version.json
--- a/.github/workflows/bump_docs.yaml
+++ b/.github/workflows/bump_docs.yaml
@@ -0,0 +1,31 @@
+name: Bump dependencies
+on:
+  schedule:
+    - cron: 0 20 * * *
+  workflow_dispatch:
+jobs:
+  bump:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - repository: "mudler/LocalAI"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Bump dependencies 🔧
+        run: |
+          bash .github/bump_docs.sh ${{ matrix.repository }}
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v5
+        with:
+          token: ${{ secrets.UPDATE_BOT_TOKEN }}
+          push-to-fork: ci-forks/LocalAI
+          commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
+          title: ':arrow_up: Update docs version ${{ matrix.repository }}'
+          branch: "update/docs"
+          body: Bump of ${{ matrix.repository }} version inside docs
+          signoff: true
+
+
+
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -133,6 +133,37 @@ jobs:



+  tests-petals:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg
+          curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
+             sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
+              gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
+             sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
+             sudo apt-get update && \
+             sudo apt-get install -y conda
+          sudo apt-get install -y ca-certificates cmake curl patch
+          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
+          
+          sudo rm -rfv /usr/bin/conda || true
+
+      - name: Test petals
+        run: |
+           export PATH=$PATH:/opt/conda/bin
+           make -C backend/python/petals
+           make -C backend/python/petals test
+
+           
+
  tests-bark:
    runs-on: ubuntu-latest
    steps:
--- a/6
+++ b/6
@@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0

 GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7

-CPPLLAMA_VERSION?=88ae8952b65cbf32eb1f5703681ea592e510e570
+CPPLLAMA_VERSION?=328b83de23b33240e28f4e74900d1d06726f5eb1

 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -19,10 +19,10 @@ GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a

 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
-RWKV_VERSION?=c898cd0f62df8f2a7830e53d1d513bef4f6f792b
+RWKV_VERSION?=8f6d062fa80ed4ac4a00d1ac53aa4de54183fffe

 # whisper.cpp version
-WHISPER_CPP_VERSION?=940de9dbe9c90624dc99521cb34c8a97b86d543c
+WHISPER_CPP_VERSION?=9286d3f584240ba58bd44a1bd1e85141579c78d4

 # bert.cpp version
 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
--- a/api/api.go
+++ b/api/api.go
@@ -47,6 +47,10 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
 		}
 	}

+	if err := cl.Preload(options.Loader.ModelPath); err != nil {
+		log.Error().Msgf("error downloading models: %s", err.Error())
+	}
+
 	if options.Debug {
 		for _, v := range cl.ListConfigs() {
 			cfg, _ := cl.GetConfig(v)
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -294,7 +294,7 @@ var _ = Describe("API test", func() {
 				Expect(content["backend"]).To(Equal("bert-embeddings"))
 			})

-			It("runs openllama", Label("llama"), func() {
+			It("runs openllama(llama-ggml backend)", Label("llama"), func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
@@ -362,9 +362,10 @@ var _ = Describe("API test", func() {
 				Expect(res["location"]).To(Equal("San Francisco, California, United States"), fmt.Sprint(res))
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
+
 			})

-			It("runs openllama gguf", Label("llama-gguf"), func() {
+			It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
--- a/api/config/config.go
+++ b/api/config/config.go
@@ -8,6 +8,8 @@ import (
 	"strings"
 	"sync"

+	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
 )

@@ -264,6 +266,36 @@ func (cm *ConfigLoader) ListConfigs() []string {
 	return res
 }

+func (cm *ConfigLoader) Preload(modelPath string) error {
+	cm.Lock()
+	defer cm.Unlock()
+
+	for i, config := range cm.configs {
+		modelURL := config.PredictionOptions.Model
+		modelURL = utils.ConvertURL(modelURL)
+		if strings.HasPrefix(modelURL, "http://") || strings.HasPrefix(modelURL, "https://") {
+			// md5 of model name
+			md5Name := utils.MD5(modelURL)
+
+			// check if file exists
+			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); err == os.ErrNotExist {
+				err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", func(fileName, current, total string, percent float64) {
+					log.Info().Msgf("Downloading %s: %s/%s (%.2f%%)", fileName, current, total, percent)
+				})
+				if err != nil {
+					return err
+				}
+			}
+
+			cc := cm.configs[i]
+			c := &cc
+			c.PredictionOptions.Model = md5Name
+			cm.configs[i] = *c
+		}
+	}
+	return nil
+}
+
 func (cm *ConfigLoader) LoadConfigs(path string) error {
 	cm.Lock()
 	defer cm.Unlock()
--- a/api/openai/chat.go
+++ b/api/openai/chat.go
@@ -219,7 +219,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 			c.Set("Transfer-Encoding", "chunked")
 		}

-		templateFile := config.Model
+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}

 		if config.TemplateConfig.Chat != "" && !processFunctions {
 			templateFile = config.TemplateConfig.Chat
@@ -229,18 +234,19 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 			templateFile = config.TemplateConfig.Functions
 		}

-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
-			SystemPrompt:         config.SystemPrompt,
-			SuppressSystemPrompt: suppressConfigSystemPrompt,
-			Input:                predInput,
-			Functions:            funcs,
-		})
-		if err == nil {
-			predInput = templatedInput
-			log.Debug().Msgf("Template found, input modified to: %s", predInput)
-		} else {
-			log.Debug().Msgf("Template failed loading: %s", err.Error())
+		if templateFile != "" {
+			templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
+				SystemPrompt:         config.SystemPrompt,
+				SuppressSystemPrompt: suppressConfigSystemPrompt,
+				Input:                predInput,
+				Functions:            funcs,
+			})
+			if err == nil {
+				predInput = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", predInput)
+			} else {
+				log.Debug().Msgf("Template failed loading: %s", err.Error())
+			}
 		}

 		log.Debug().Msgf("Prompt (after templating): %s", predInput)
--- a/api/openai/completion.go
+++ b/api/openai/completion.go
@@ -81,7 +81,12 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
 			c.Set("Transfer-Encoding", "chunked")
 		}

-		templateFile := config.Model
+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}

 		if config.TemplateConfig.Completion != "" {
 			templateFile = config.TemplateConfig.Completion
@@ -94,13 +99,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe

 			predInput := config.PromptStrings[0]

-			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-				Input: predInput,
-			})
-			if err == nil {
-				predInput = templatedInput
-				log.Debug().Msgf("Template found, input modified to: %s", predInput)
+			if templateFile != "" {
+				templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+					Input: predInput,
+				})
+				if err == nil {
+					predInput = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", predInput)
+				}
 			}

 			responses := make(chan schema.OpenAIResponse)
@@ -145,14 +151,16 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
 		totalTokenUsage := backend.TokenUsage{}

 		for k, i := range config.PromptStrings {
-			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-				SystemPrompt: config.SystemPrompt,
-				Input:        i,
-			})
-			if err == nil {
-				i = templatedInput
-				log.Debug().Msgf("Template found, input modified to: %s", i)
+			if templateFile != "" {
+				// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+				templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
+					SystemPrompt: config.SystemPrompt,
+					Input:        i,
+				})
+				if err == nil {
+					i = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", i)
+				}
 			}

 			r, tokenUsage, err := ComputeChoices(
--- a/api/openai/edit.go
+++ b/api/openai/edit.go
@@ -30,7 +30,12 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)

 		log.Debug().Msgf("Parameter Config: %+v", config)

-		templateFile := config.Model
+		templateFile := ""
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		if o.Loader.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
+			templateFile = config.Model
+		}

 		if config.TemplateConfig.Edit != "" {
 			templateFile = config.TemplateConfig.Edit
@@ -40,15 +45,16 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 		totalTokenUsage := backend.TokenUsage{}

 		for _, i := range config.InputStrings {
-			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-			templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
-				Input:        i,
-				Instruction:  input.Instruction,
-				SystemPrompt: config.SystemPrompt,
-			})
-			if err == nil {
-				i = templatedInput
-				log.Debug().Msgf("Template found, input modified to: %s", i)
+			if templateFile != "" {
+				templatedInput, err := o.Loader.EvaluateTemplateForPrompt(model.EditPromptTemplate, templateFile, model.PromptTemplateData{
+					Input:        i,
+					Instruction:  input.Instruction,
+					SystemPrompt: config.SystemPrompt,
+				})
+				if err == nil {
+					i = templatedInput
+					log.Debug().Msgf("Template found, input modified to: %s", i)
+				}
 			}

 			r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
--- a/backend/python/bark/Makefile
+++ b/backend/python/bark/Makefile
@@ -1,8 +1,6 @@
 .PHONY: ttsbark
 ttsbark:
-	@echo "Creating virtual environment..."
-	@conda env create --name ttsbark --file ttsbark.yml
-	@echo "Virtual environment created."
+	$(MAKE) -C ../common-env/transformers

 .PHONY: run
 run:
--- a/backend/python/bark/run.sh
+++ b/backend/python/bark/run.sh
@@ -6,7 +6,7 @@
 export PATH=$PATH:/opt/conda/bin

 # Activate conda environment
-source activate ttsbark
+source activate transformers

 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
--- a/backend/python/bark/test.sh
+++ b/backend/python/bark/test.sh
@@ -3,7 +3,7 @@
 ## A bash script wrapper that runs the bark server with conda

 # Activate conda environment
-source activate ttsbark
+source activate transformers

 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
--- a/backend/python/common-env/transformers/Makefile
+++ b/backend/python/common-env/transformers/Makefile
@@ -0,0 +1,10 @@
+CONDA_ENV_PATH = "transformers.yml"
+
+ifeq ($(BUILD_TYPE), cublas)
+	CONDA_ENV_PATH = "transformers-nvidia.yml"
+endif
+
+.PHONY: transformers
+transformers:
+	@echo "Installing $(CONDA_ENV_PATH)..."
+	bash install.sh $(CONDA_ENV_PATH)
--- a/backend/python/common-env/transformers/install.sh
+++ b/backend/python/common-env/transformers/install.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -ex
+
+# Check if environment exist
+conda_env_exists(){
+    ! conda list --name "${@}" >/dev/null 2>/dev/null
+}
+
+if conda_env_exists "transformers" ; then
+    echo "Creating virtual environment..."
+    conda env create --name transformers --file $1
+    echo "Virtual environment created."
+else 
+    echo "Virtual environment already exists."
+fi
--- a/backend/python/common-env/transformers/transformers-nvidia.yml
+++ b/backend/python/common-env/transformers/transformers-nvidia.yml
@@ -1,4 +1,4 @@
-name: bark
+name: transformers
 channels:
  - defaults
 dependencies:
@@ -35,6 +35,8 @@ dependencies:
      - certifi==2023.7.22
      - charset-normalizer==3.3.0
      - datasets==2.14.5
+      - sentence-transformers==2.2.2
+      - sentencepiece==0.1.99
      - dill==0.3.7
      - einops==0.7.0
      - encodec==0.1.1
@@ -68,6 +70,7 @@ dependencies:
      - packaging==23.2
      - pandas==2.1.1
      - peft==0.5.0
+      - git+https://github.com/bigscience-workshop/petals
      - protobuf==4.24.4
      - psutil==5.9.5
      - pyarrow==13.0.0
@@ -93,4 +96,4 @@ dependencies:
      - urllib3==1.26.17
      - xxhash==3.4.1
      - yarl==1.9.2
-prefix: /opt/conda/envs/bark
+prefix: /opt/conda/envs/transformers
--- a/backend/python/common-env/transformers/transformers.yml
+++ b/backend/python/common-env/transformers/transformers.yml
@@ -20,58 +20,68 @@ dependencies:
  - setuptools=68.0.0=py311h06a4308_0
  - sqlite=3.41.2=h5eee18b_0
  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
  - wheel=0.41.2=py311h06a4308_0
  - xz=5.4.2=h5eee18b_0
  - zlib=1.2.13=h5eee18b_0
  - pip:
+      - accelerate==0.23.0
+      - aiohttp==3.8.5
+      - aiosignal==1.3.1
+      - async-timeout==4.0.3
+      - attrs==23.1.0
+      - bark==0.1.5
+      - boto3==1.28.61
+      - botocore==1.31.61
      - certifi==2023.7.22
      - charset-normalizer==3.3.0
-      - click==8.1.7
+      - datasets==2.14.5
+      - sentence-transformers==2.2.2
+      - sentencepiece==0.1.99
+      - dill==0.3.7
+      - einops==0.7.0
+      - encodec==0.1.1
      - filelock==3.12.4
-      - fsspec==2023.9.2
+      - frozenlist==1.4.0
+      - fsspec==2023.6.0
+      - funcy==2.0
      - grpcio==1.59.0
-      - huggingface-hub==0.17.3
+      - huggingface-hub==0.16.4
      - idna==3.4
-      - install==1.3.5
      - jinja2==3.1.2
-      - joblib==1.3.2
+      - jmespath==1.0.1
      - markupsafe==2.1.3
      - mpmath==1.3.0
+      - multidict==6.0.4
+      - multiprocess==0.70.15
      - networkx==3.1
-      - nltk==3.8.1
      - numpy==1.26.0
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
      - packaging==23.2
-      - pillow==10.0.1
+      - pandas==2.1.1
+      - peft==0.5.0
+      - git+https://github.com/bigscience-workshop/petals
      - protobuf==4.24.4
+      - psutil==5.9.5
+      - pyarrow==13.0.0
+      - python-dateutil==2.8.2
+      - pytz==2023.3.post1
      - pyyaml==6.0.1
      - regex==2023.10.3
      - requests==2.31.0
-      - safetensors==0.4.0
-      - scikit-learn==1.3.1
+      - rouge==1.0.1
+      - s3transfer==0.7.0
+      - safetensors==0.3.3
      - scipy==1.11.3
-      - sentence-transformers==2.2.2
-      - sentencepiece==0.1.99
+      - six==1.16.0
      - sympy==1.12
-      - threadpoolctl==3.2.0
-      - tokenizers==0.14.1
+      - tokenizers==0.14.0
      - torch==2.1.0
-      - torchvision==0.16.0
+      - torchaudio==2.1.0
      - tqdm==4.66.1
      - transformers==4.34.0
      - triton==2.1.0
      - typing-extensions==4.8.0
-      - urllib3==2.0.6
+      - tzdata==2023.3
+      - urllib3==1.26.17
+      - xxhash==3.4.1
+      - yarl==1.9.2
 prefix: /opt/conda/envs/transformers
--- a/backend/python/diffusers/diffusers.yml
+++ b/backend/python/diffusers/diffusers.yml
@@ -70,4 +70,4 @@ dependencies:
      - typing-extensions==4.8.0
      - urllib3==2.0.6
      - zipp==3.17.0
-prefix: /opt/conda/envs/diffusers
+prefix: /opt/conda/envs/diffusers
--- a/backend/python/petals/Makefile
+++ b/backend/python/petals/Makefile
@@ -1,11 +1,15 @@
 .PHONY: petals
 petals:
-	@echo "Creating virtual environment..."
-	@conda env create --name petals --file petals.yml
-	@echo "Virtual environment created."
+	$(MAKE) -C ../common-env/transformers

 .PHONY: run
 run:
 	@echo "Running petals..."
 	bash run.sh
 	@echo "petals run."
+
+.PHONY: test
+test:
+	@echo "Testing petals..."
+	bash test.sh
+	@echo "petals tested."
--- a/backend/python/petals/run.sh
+++ b/backend/python/petals/run.sh
@@ -9,10 +9,10 @@ export PATH=$PATH:/opt/conda/bin
 # if source is available use it, or use conda
 #
 if [ -f /opt/conda/bin/activate ]; then
-    source activate petals
+    source activate transformers
 else
    eval "$(conda shell.bash hook)"
-    conda activate petals
+    conda activate transformers
 fi

 # get the directory where the bash script is located
--- a/backend/python/petals/test.sh
+++ b/backend/python/petals/test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+##
+## A bash script wrapper that runs the transformers server with conda
+
+# Activate conda environment
+source activate transformers
+
+# get the directory where the bash script is located
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+python -m unittest $DIR/test_petals.py
--- a/backend/python/petals/test_petals.py
+++ b/backend/python/petals/test_petals.py
@@ -0,0 +1,58 @@
+import unittest
+import subprocess
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+import unittest
+import subprocess
+import time
+import grpc
+import backend_pb2_grpc
+import backend_pb2
+
+class TestBackendServicer(unittest.TestCase):
+    """
+    TestBackendServicer is the class that tests the gRPC service.
+
+    This class contains methods to test the startup and shutdown of the gRPC service.
+    """
+    def setUp(self):
+        self.service = subprocess.Popen(["python", "backend_petals.py", "--addr", "localhost:50051"])
+        time.sleep(10)
+
+    def tearDown(self) -> None:
+        self.service.terminate()
+        self.service.wait()
+
+    def test_server_startup(self):
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.Health(backend_pb2.HealthMessage())
+                self.assertEqual(response.message, b'OK')
+        except Exception as err:
+            print(err)
+            self.fail("Server failed to start")
+        finally:
+            self.tearDown()
+    def test_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m"))
+                print(response)
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
--- a/backend/python/sentencetransformers/Makefile
+++ b/backend/python/sentencetransformers/Makefile
@@ -1,8 +1,7 @@
 .PHONY: sentencetransformers
 sentencetransformers:
-	@echo "Creating virtual environment..."
-	@conda env create --name sentencetransformers --file sentencetransformers.yml
-	@echo "Virtual environment created."
+	$(MAKE) -C ../common-env/transformers
+

 .PHONY: run
 run:
--- a/backend/python/sentencetransformers/run.sh
+++ b/backend/python/sentencetransformers/run.sh
@@ -6,7 +6,7 @@
 export PATH=$PATH:/opt/conda/bin

 # Activate conda environment
-source activate sentencetransformers
+source activate transformers

 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
--- a/backend/python/sentencetransformers/sentencetransformers.yml
+++ b/backend/python/sentencetransformers/sentencetransformers.yml
@@ -1,77 +0,0 @@
-name: sentencetransformers
-channels:
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.08.22=h06a4308_0
-  - ld_impl_linux-64=2.38=h1181459_1
-  - libffi=3.4.4=h6a678d5_0
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libstdcxx-ng=11.2.0=h1234567_1
-  - libuuid=1.41.5=h5eee18b_0
-  - ncurses=6.4=h6a678d5_0
-  - openssl=3.0.11=h7f8727e_2
-  - pip=23.2.1=py311h06a4308_0
-  - python=3.11.5=h955ad1f_0
-  - readline=8.2=h5eee18b_0
-  - setuptools=68.0.0=py311h06a4308_0
-  - sqlite=3.41.2=h5eee18b_0
-  - tk=8.6.12=h1ccaba5_0
-  - tzdata=2023c=h04d1e81_0
-  - wheel=0.41.2=py311h06a4308_0
-  - xz=5.4.2=h5eee18b_0
-  - zlib=1.2.13=h5eee18b_0
-  - pip:
-      - certifi==2023.7.22
-      - charset-normalizer==3.3.0
-      - click==8.1.7
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - huggingface-hub==0.17.3
-      - idna==3.4
-      - install==1.3.5
-      - jinja2==3.1.2
-      - joblib==1.3.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - nltk==3.8.1
-      - numpy==1.26.0
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
-      - packaging==23.2
-      - pillow==10.0.1
-      - protobuf==4.24.4
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - safetensors==0.4.0
-      - scikit-learn==1.3.1
-      - scipy==1.11.3
-      - sentence-transformers==2.2.2
-      - sentencepiece==0.1.99
-      - sympy==1.12
-      - threadpoolctl==3.2.0
-      - tokenizers==0.14.1
-      - torch==2.1.0
-      - torchvision==0.16.0
-      - tqdm==4.66.1
-      - transformers==4.34.0
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - urllib3==2.0.6
-prefix: /opt/conda/envs/sentencetransformers
--- a/backend/python/sentencetransformers/test.sh
+++ b/backend/python/sentencetransformers/test.sh
@@ -3,7 +3,7 @@
 ## A bash script wrapper that runs the sentencetransformers server with conda

 # Activate conda environment
-source activate sentencetransformers
+source activate transformers

 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
--- a/backend/python/transformers-musicgen/Makefile
+++ b/backend/python/transformers-musicgen/Makefile
@@ -1,15 +1,7 @@

-TRANSFORMERS_MUSICGEN_CONDA_PATH = "transformers-musicgen.yml"
-
-ifeq ($(BUILD_TYPE), cublas)
-	TRANSFORMERS_MUSICGEN_CONDA_PATH = "transformers-musicgen-nvidia.yml"
-endif
-
 .PHONY: transformers-musicgen
 transformers-musicgen:
-	@echo "Creating virtual environment..."
-	@conda env create --name transformers-musicgen --file $(TRANSFORMERS_MUSICGEN_CONDA_PATH)
-	@echo "Virtual environment created."
+	$(MAKE) -C ../common-env/transformers

 .PHONY: run
 run:
--- a/backend/python/transformers-musicgen/test.sh
+++ b/backend/python/transformers-musicgen/test.sh
@@ -3,7 +3,7 @@
 ## A bash script wrapper that runs the transformers server with conda

 # Activate conda environment
-source activate transformers-musicgen
+source activate transformers

 # get the directory where the bash script is located
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
--- a/backend/python/transformers-musicgen/transformers-musicgen-nvidia.yml
+++ b/backend/python/transformers-musicgen/transformers-musicgen-nvidia.yml
@@ -1,71 +0,0 @@
-name: transformers-musicgen
-channels:
-  - defaults
-dependencies:
-  - bzip2=1.0.8
-  - ca-certificates=2023.08.22
-  - libffi=3.4.4
-  - libuuid=1.41.5
-  - ncurses=6.4
-  - openssl=3.0.11
-  - pip=23.2.1
-  - python=3.11.5
-  - readline=8.2
-  - setuptools=68.0.0
-  - sqlite=3.41.2
-  - tk=8.6.12
-  - tzdata=2023c
-  - wheel=0.41.2
-  - xz=5.4.2
-  - zlib=1.2.13
-  - pip:
-      - certifi==2023.7.22
-      - charset-normalizer==3.3.0
-      - click==8.1.7
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - huggingface-hub==0.17.3
-      - idna==3.4
-      - install==1.3.5
-      - jinja2==3.1.2
-      - joblib==1.3.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - nltk==3.8.1
-      - numpy==1.26.0
-      - nvidia-cublas-cu12==12.1.3.1
-      - nvidia-cuda-cupti-cu12==12.1.105
-      - nvidia-cuda-nvrtc-cu12==12.1.105
-      - nvidia-cuda-runtime-cu12==12.1.105
-      - nvidia-cudnn-cu12==8.9.2.26
-      - nvidia-cufft-cu12==11.0.2.54
-      - nvidia-curand-cu12==10.3.2.106
-      - nvidia-cusolver-cu12==11.4.5.107
-      - nvidia-cusparse-cu12==12.1.0.106
-      - nvidia-nccl-cu12==2.18.1
-      - nvidia-nvjitlink-cu12==12.2.140
-      - nvidia-nvtx-cu12==12.1.105
-      - packaging==23.2
-      - pillow==10.0.1
-      - protobuf==4.24.4
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - safetensors==0.4.0
-      - scikit-learn==1.3.1
-      - scipy==1.11.3
-      - sentence-transformers==2.2.2
-      - sentencepiece==0.1.99
-      - sympy==1.12
-      - threadpoolctl==3.2.0
-      - tokenizers==0.14.1
-      - torch==2.1.0
-      - torchvision==0.16.0
-      - tqdm==4.66.1
-      - transformers==4.34.0
-      - triton==2.1.0
-      - typing-extensions==4.8.0
-      - urllib3==2.0.6
-prefix: /opt/conda/envs/transformers-musicgen
--- a/backend/python/transformers-musicgen/transformers-musicgen.yml
+++ b/backend/python/transformers-musicgen/transformers-musicgen.yml
@@ -1,58 +0,0 @@
-name: transformers-musicgen
-channels:
-  - defaults
-dependencies:
-  - bzip2=1.0.8
-  - ca-certificates=2023.08.22
-  - libffi=3.4.4
-  - libuuid=1.41.5
-  - ncurses=6.4
-  - openssl=3.0.11
-  - pip=23.2.1
-  - python=3.11.5
-  - readline=8.2
-  - setuptools=68.0.0
-  - sqlite=3.41.2
-  - tk=8.6.12
-  - tzdata=2023c
-  - wheel=0.41.2
-  - xz=5.4.2
-  - zlib=1.2.13
-  - pip:
-      - certifi==2023.7.22
-      - charset-normalizer==3.3.0
-      - click==8.1.7
-      - filelock==3.12.4
-      - fsspec==2023.9.2
-      - grpcio==1.59.0
-      - huggingface-hub==0.17.3
-      - idna==3.4
-      - install==1.3.5
-      - jinja2==3.1.2
-      - joblib==1.3.2
-      - markupsafe==2.1.3
-      - mpmath==1.3.0
-      - networkx==3.1
-      - nltk==3.8.1
-      - numpy==1.26.0
-      - packaging==23.2
-      - pillow==10.0.1
-      - protobuf==4.24.4
-      - pyyaml==6.0.1
-      - regex==2023.10.3
-      - requests==2.31.0
-      - safetensors==0.4.0
-      - scikit-learn==1.3.1
-      - scipy==1.11.3
-      - sentence-transformers==2.2.2
-      - sentencepiece==0.1.99
-      - sympy==1.12
-      - threadpoolctl==3.2.0
-      - tokenizers==0.14.1
-      - torch==2.1.0
-      - torchvision==0.16.0
-      - tqdm==4.66.1
-      - transformers==4.34.0
-      - typing-extensions==4.8.0
-      - urllib3==2.0.6
-prefix: /opt/conda/envs/transformers-musicgen
--- a/backend/python/transformers/Makefile
+++ b/backend/python/transformers/Makefile
@@ -1,8 +1,6 @@
 .PHONY: transformers
 transformers:
-	@echo "Creating virtual environment..."
-	@conda env create --name transformers --file transformers.yml
-	@echo "Virtual environment created."
+	$(MAKE) -C ../common-env/transformers

 .PHONY: run
 run:
--- a/backend/python/vllm/test_backend_vllm.py
+++ b/backend/python/vllm/test_backend_vllm.py
@@ -66,7 +66,7 @@ class TestBackendServicer(unittest.TestCase):
                stub = backend_pb2_grpc.BackendStub(channel)
                response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
                self.assertTrue(response.success)
-                req = backend_pb2.PredictOptions(prompt="The capital of France is")
+                req = backend_pb2.PredictOptions(Prompt="The capital of France is")
                resp = stub.Predict(req)
                self.assertIsNotNone(resp.message)
        except Exception as err:
--- a/docs/content/getting_started/_index.en.md
+++ b/docs/content/getting_started/_index.en.md
@@ -123,9 +123,9 @@ LocalAI has a set of images to support CUDA, ffmpeg and 'vanilla' (CPU-only). Th
 {{% tab name="Vanilla / CPU Images" %}}
 - `master`
 - `latest`
- `v2.0.0`
- `v2.0.0-ffmpeg`
- `v2.0.0-ffmpeg-core`
+- `{{< version >}}`
+- `{{< version >}}-ffmpeg`
+- `{{< version >}}-ffmpeg-core`

 Core Images - Smaller images without predownload python dependencies
 {{% /tab %}}
@@ -133,10 +133,10 @@ Core Images - Smaller images without predownload python dependencies
 {{% tab name="GPU Images CUDA 11" %}}
 - `master-cublas-cuda11`
 - `master-cublas-cuda11-core`
- `v2.0.0-cublas-cuda11`
- `v2.0.0-cublas-cuda11-core`
- `v2.0.0-cublas-cuda11-ffmpeg`
- `v2.0.0-cublas-cuda11-ffmpeg-core`
+- `{{< version >}}-cublas-cuda11`
+- `{{< version >}}-cublas-cuda11-core`
+- `{{< version >}}-cublas-cuda11-ffmpeg`
+- `{{< version >}}-cublas-cuda11-ffmpeg-core`

 Core Images - Smaller images without predownload python dependencies
 {{% /tab %}}
@@ -144,10 +144,10 @@ Core Images - Smaller images without predownload python dependencies
 {{% tab name="GPU Images CUDA 12" %}}
 - `master-cublas-cuda12`
 - `master-cublas-cuda12-core`
- `v2.0.0-cublas-cuda12`
- `v2.0.0-cublas-cuda12-core`
- `v2.0.0-cublas-cuda12-ffmpeg`
- `v2.0.0-cublas-cuda12-ffmpeg-core`
+- `{{< version >}}-cublas-cuda12`
+- `{{< version >}}-cublas-cuda12-core`
+- `{{< version >}}-cublas-cuda12-ffmpeg`
+- `{{< version >}}-cublas-cuda12-ffmpeg-core`

 Core Images - Smaller images without predownload python dependencies

@@ -158,9 +158,9 @@ Core Images - Smaller images without predownload python dependencies
 Example:

 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:latest`
- FFmpeg: `quay.io/go-skynet/local-ai:v2.0.0-ffmpeg`
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v2.0.0-cublas-cuda11-ffmpeg`
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v2.0.0-cublas-cuda12-ffmpeg`
+- FFmpeg: `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg`
+- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg`
+- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg`

 {{% notice note %}}
 Note: the binary inside the image is pre-compiled, and might not suite all CPUs.
--- a/docs/content/howtos/_index.md
+++ b/docs/content/howtos/_index.md
@@ -8,8 +8,7 @@ weight = 9

 This section includes LocalAI end-to-end examples, tutorial and how-tos curated by the community and maintained by [lunamidori5](https://github.com/lunamidori5).

- [Setup LocalAI with Docker on CPU]({{%relref "howtos/easy-setup-docker-cpu" %}})
- [Setup LocalAI with Docker With CUDA]({{%relref "howtos/easy-setup-docker-gpu" %}})
+- [Setup LocalAI with Docker]({{%relref "howtos/easy-setup-docker" %}})
 - [Seting up a Model]({{%relref "howtos/easy-model" %}})
 - [Making Text / LLM requests to LocalAI]({{%relref "howtos/easy-request" %}})
 - [Making Photo / SD requests to LocalAI]({{%relref "howtos/easy-setup-sd" %}})
--- a/docs/content/howtos/easy-setup-docker-cpu.md
+++ b/docs/content/howtos/easy-setup-docker-cpu.md
@@ -1,137 +0,0 @@
-
-+++
-disableToc = false
-title = "Easy Setup - CPU Docker"
-weight = 2
-+++
-
-{{% notice Note %}}
- You will need about 10gb of RAM Free
- You will need about 15gb of space free on C drive for ``Docker compose``
-{{% /notice %}}
-
-We are going to run `LocalAI` with `docker compose` for this set up.
-
-Lets setup our folders for ``LocalAI``
-{{< tabs >}}
-{{% tab name="Windows (Batch)" %}}
-```batch
-mkdir "LocalAI"
-cd LocalAI
-mkdir "models"
-mkdir "images"
-```
-{{% /tab %}}
-
-{{% tab name="Linux (Bash / WSL)" %}}
-```bash
-mkdir -p "LocalAI"
-cd LocalAI
-mkdir -p "models"
-mkdir -p "images"
-```
-{{% /tab %}}
-{{< /tabs >}}
-
-At this point we want to set up our `.env` file, here is a copy for you to use if you wish, Make sure this is in the ``LocalAI`` folder.
-
-```bash
-## Set number of threads.
-## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
-THREADS=2
-
-## Specify a different bind address (defaults to ":8080")
-# ADDRESS=127.0.0.1:8080
-
-## Define galleries.
-## models will to install will be visible in `/models/available`
-GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]
-
-## Default path for models
-MODELS_PATH=/models
-
-## Enable debug mode
-# DEBUG=true
-
-## Disables COMPEL (Lets Stable Diffuser work, uncomment if you plan on using it)
-# COMPEL=0
-
-## Enable/Disable single backend (useful if only one GPU is available)
-# SINGLE_ACTIVE_BACKEND=true
-
-## Specify a build type. Available: cublas, openblas, clblas.
-BUILD_TYPE=cublas
-
-## Uncomment and set to true to enable rebuilding from source
-# REBUILD=true
-
-## Enable go tags, available: stablediffusion, tts
-## stablediffusion: image generation with stablediffusion
-## tts: enables text-to-speech with go-piper 
-## (requires REBUILD=true)
-#
-#GO_TAGS=tts
-
-## Path where to store generated images
-# IMAGE_PATH=/tmp
-
-## Specify a default upload limit in MB (whisper)
-# UPLOAD_LIMIT
-
-# HUGGINGFACEHUB_API_TOKEN=Token here
-```
-
-
-Now that we have the `.env` set lets set up our `docker-compose` file.
-It will use a container from [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags).
-Also note this `docker-compose` file is for `CPU` only.
-
-```docker
-version: '3.6'
-
-services:
-  api:
-    image: quay.io/go-skynet/local-ai:v2.0.0
-    tty: true # enable colorized logs
-    restart: always # should this be on-failure ?
-    ports:
-      - 8080:8080
-    env_file:
-      - .env
-    volumes:
-      - ./models:/models
-      - ./images/:/tmp/generated/images/
-    command: ["/usr/bin/local-ai" ]
-```
-
-
-Make sure to save that in the root of the `LocalAI` folder. Then lets spin up the Docker run this in a `CMD` or `BASH`
-
-```bash
-docker compose up -d --pull always
-```
-
-
-Now we are going to let that set up, once it is done, lets check to make sure our huggingface / localai galleries are working (wait until you see this screen to do this)
-
-You should see:
-```
-┌───────────────────────────────────────────────────┐
-│                   Fiber v2.42.0                   │
-│               http://127.0.0.1:8080               │
-│       (bound on host 0.0.0.0 and port 8080)       │
-│                                                   │
-│ Handlers ............. 1  Processes ........... 1 │
-│ Prefork ....... Disabled  PID ................. 1 │
-└───────────────────────────────────────────────────┘
-```
-
-```bash
-curl http://localhost:8080/models/available
-```
-
-Output will look like this:
-
-![](https://cdn.discordapp.com/attachments/1116933141895053322/1134037542845566976/image.png)
-
-Now that we got that setup, lets go setup a [model]({{%relref "easy-model" %}})
--- a/docs/content/howtos/easy-setup-docker-gpu.md
+++ b/docs/content/howtos/easy-setup-docker-gpu.md
@@ -1,7 +1,7 @@

 +++
 disableToc = false
-title = "Easy Setup - GPU Docker"
+title = "Easy Setup - Docker"
 weight = 2
 +++

@@ -12,26 +12,13 @@ weight = 2

 We are going to run `LocalAI` with `docker compose` for this set up.

-Lets Setup our folders for ``LocalAI``
-{{< tabs >}}
-{{% tab name="Windows (Batch)" %}}
+Lets setup our folders for ``LocalAI`` (run these to make the folders for you if you wish)
 ```batch
 mkdir "LocalAI"
 cd LocalAI
 mkdir "models"
 mkdir "images"
 ```
-{{% /tab %}}
-
-{{% tab name="Linux (Bash / WSL)" %}}
-```bash
-mkdir -p "LocalAI"
-cd LocalAI
-mkdir -p "models"
-mkdir -p "images"
-```
-{{% /tab %}}
-{{< /tabs >}}

 At this point we want to set up our `.env` file, here is a copy for you to use if you wish, Make sure this is in the ``LocalAI`` folder.

@@ -51,7 +38,7 @@ GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.
 MODELS_PATH=/models

 ## Enable debug mode
-# DEBUG=true
+DEBUG=true

 ## Disables COMPEL (Lets Stable Diffuser work, uncomment if you plan on using it)
 # COMPEL=0
@@ -84,6 +71,32 @@ BUILD_TYPE=cublas

 Now that we have the `.env` set lets set up our `docker-compose` file.
 It will use a container from [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags).
+
+
+{{< tabs >}}
+{{% tab name="CPU Only" %}}
+Also note this `docker-compose` file is for `CPU` only.
+
+```docker
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:{{< version >}}
+    tty: true # enable colorized logs
+    restart: always # should this be on-failure ?
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models
+      - ./images/:/tmp/generated/images/
+    command: ["/usr/bin/local-ai" ]
+```
+{{% /tab %}}
+
+{{% tab name="GPU and CPU" %}}
 Also note this `docker-compose` file is for `CUDA` only.

 Please change the image to what you need.
@@ -91,10 +104,10 @@ Please change the image to what you need.
 {{% tab name="GPU Images CUDA 11" %}}
 - `master-cublas-cuda11`
 - `master-cublas-cuda11-core`
- `v2.0.0-cublas-cuda11`
- `v2.0.0-cublas-cuda11-core`
- `v2.0.0-cublas-cuda11-ffmpeg`
- `v2.0.0-cublas-cuda11-ffmpeg-core`
+- `{{< version >}}-cublas-cuda11`
+- `{{< version >}}-cublas-cuda11-core`
+- `{{< version >}}-cublas-cuda11-ffmpeg`
+- `{{< version >}}-cublas-cuda11-ffmpeg-core`

 Core Images - Smaller images without predownload python dependencies
 {{% /tab %}}
@@ -102,10 +115,10 @@ Core Images - Smaller images without predownload python dependencies
 {{% tab name="GPU Images CUDA 12" %}}
 - `master-cublas-cuda12`
 - `master-cublas-cuda12-core`
- `v2.0.0-cublas-cuda12`
- `v2.0.0-cublas-cuda12-core`
- `v2.0.0-cublas-cuda12-ffmpeg`
- `v2.0.0-cublas-cuda12-ffmpeg-core`
+- `{{< version >}}-cublas-cuda12`
+- `{{< version >}}-cublas-cuda12-core`
+- `{{< version >}}-cublas-cuda12-ffmpeg`
+- `{{< version >}}-cublas-cuda12-ffmpeg-core`

 Core Images - Smaller images without predownload python dependencies
 {{% /tab %}}
@@ -135,6 +148,8 @@ services:
      - ./images/:/tmp/generated/images/
    command: ["/usr/bin/local-ai" ]
 ```
+{{% /tab %}}
+{{< /tabs >}}


 Make sure to save that in the root of the `LocalAI` folder. Then lets spin up the Docker run this in a `CMD` or `BASH`
--- a/docs/content/howtos/easy-setup-embeddings.md
+++ b/docs/content/howtos/easy-setup-embeddings.md
@@ -12,17 +12,6 @@ curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '
   }'  
 ```

-Now we need to make a ``bert.yaml`` in the models folder
-```yaml
-backend: bert-embeddings
-embeddings: true
-name: text-embedding-ada-002
-parameters:
-  model: bert
-```
-
-**Restart LocalAI after you change a yaml file**
-
 When you would like to request the model from CLI you can do 

 ```bash
@@ -30,7 +19,7 @@ curl http://localhost:8080/v1/embeddings \
  -H "Content-Type: application/json" \
  -d '{
    "input": "The food was delicious and the waiter...",
-    "model": "text-embedding-ada-002"
+    "model": "bert-embeddings"
  }'
 ```

--- a/docs/content/howtos/easy-setup-sd.md
+++ b/docs/content/howtos/easy-setup-sd.md
@@ -5,7 +5,7 @@ weight = 2
 +++

 To set up a Stable Diffusion model is super easy.
-In your models folder make a file called ``stablediffusion.yaml``, then edit that file with the following. (You can change ``Linaqruf/animagine-xl`` with what ever ``sd-lx`` model you would like.
+In your ``models`` folder make a file called ``stablediffusion.yaml``, then edit that file with the following. (You can change ``Linaqruf/animagine-xl`` with what ever ``sd-lx`` model you would like.
 ```yaml
 name: animagine-xl
 parameters:
@@ -21,8 +21,7 @@ diffusers:

 If you are using docker, you will need to run in the localai folder with the ``docker-compose.yaml`` file in it
 ```bash
-docker-compose down #windows
-docker compose down #linux/mac
+docker compose down
 ```

 Then in your ``.env`` file uncomment this line.
@@ -32,14 +31,13 @@ COMPEL=0

 After that we can reinstall the LocalAI docker VM by running in the localai folder with the ``docker-compose.yaml`` file in it
 ```bash
-docker-compose up #windows
-docker compose up #linux/mac
+docker compose up -d
 ```

 Then to download and setup the model, Just send in a normal ``OpenAI`` request! LocalAI will do the rest!
 ```bash
 curl http://localhost:8080/v1/images/generations -H "Content-Type: application/json" -d '{
  "prompt": "Two Boxes, 1blue, 1red",
-  "size": "256x256"
+  "size": "1024x1024"
 }'
 ```
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -0,0 +1,3 @@
+{
+  "version": "v2.1.0"
+}
--- a/docs/layouts/shortcodes/version.html
+++ b/docs/layouts/shortcodes/version.html
@@ -0,0 +1 @@
+{{ $.Site.Data.version.version }}
--- a/pkg/gallery/models.go
+++ b/pkg/gallery/models.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"hash"
 	"io"
-	"net/http"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -115,89 +114,8 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 		// Create file path
 		filePath := filepath.Join(basePath, file.Filename)

-		// Check if the file already exists
-		_, err := os.Stat(filePath)
-		if err == nil {
-			// File exists, check SHA
-			if file.SHA256 != "" {
-				// Verify SHA
-				calculatedSHA, err := calculateSHA(filePath)
-				if err != nil {
-					return fmt.Errorf("failed to calculate SHA for file %q: %v", file.Filename, err)
-				}
-				if calculatedSHA == file.SHA256 {
-					// SHA matches, skip downloading
-					log.Debug().Msgf("File %q already exists and matches the SHA. Skipping download", file.Filename)
-					continue
-				}
-				// SHA doesn't match, delete the file and download again
-				err = os.Remove(filePath)
-				if err != nil {
-					return fmt.Errorf("failed to remove existing file %q: %v", file.Filename, err)
-				}
-				log.Debug().Msgf("Removed %q (SHA doesn't match)", filePath)
-
-			} else {
-				// SHA is missing, skip downloading
-				log.Debug().Msgf("File %q already exists. Skipping download", file.Filename)
-				continue
-			}
-		} else if !os.IsNotExist(err) {
-			// Error occurred while checking file existence
-			return fmt.Errorf("failed to check file %q existence: %v", file.Filename, err)
-		}
-
-		log.Debug().Msgf("Downloading %q", file.URI)
-
-		// Download file
-		resp, err := http.Get(file.URI)
-		if err != nil {
-			return fmt.Errorf("failed to download file %q: %v", file.Filename, err)
-		}
-		defer resp.Body.Close()
-
-		// Create parent directory
-		err = os.MkdirAll(filepath.Dir(filePath), 0755)
-		if err != nil {
-			return fmt.Errorf("failed to create parent directory for file %q: %v", file.Filename, err)
-		}
-
-		// Create and write file content
-		outFile, err := os.Create(filePath)
-		if err != nil {
-			return fmt.Errorf("failed to create file %q: %v", file.Filename, err)
-		}
-		defer outFile.Close()
-
-		progress := &progressWriter{
-			fileName:       file.Filename,
-			total:          resp.ContentLength,
-			hash:           sha256.New(),
-			downloadStatus: downloadStatus,
-		}
-		_, err = io.Copy(io.MultiWriter(outFile, progress), resp.Body)
-		if err != nil {
-			return fmt.Errorf("failed to write file %q: %v", file.Filename, err)
-		}
-
-		if file.SHA256 != "" {
-			// Verify SHA
-			calculatedSHA := fmt.Sprintf("%x", progress.hash.Sum(nil))
-			if calculatedSHA != file.SHA256 {
-				log.Debug().Msgf("SHA mismatch for file %q ( calculated: %s != metadata: %s )", file.Filename, calculatedSHA, file.SHA256)
-				return fmt.Errorf("SHA mismatch for file %q ( calculated: %s != metadata: %s )", file.Filename, calculatedSHA, file.SHA256)
-			}
-		} else {
-			log.Debug().Msgf("SHA missing for %q. Skipping validation", file.Filename)
-		}
-
-		log.Debug().Msgf("File %q downloaded and verified", file.Filename)
-		if utils.IsArchive(filePath) {
-			log.Debug().Msgf("File %q is an archive, uncompressing to %s", file.Filename, basePath)
-			if err := utils.ExtractArchive(filePath, basePath); err != nil {
-				log.Debug().Msgf("Failed decompressing %q: %s", file.Filename, err.Error())
-				return err
-			}
+		if err := utils.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil {
+			return err
 		}
 	}

--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -247,17 +247,19 @@ func (ml *ModelLoader) loadTemplateIfExists(templateType TemplateType, templateN
 	// skip any error here - we run anyway if a template does not exist
 	modelTemplateFile := fmt.Sprintf("%s.tmpl", templateName)

-	if !ml.ExistsInModelPath(modelTemplateFile) {
-		return nil
-	}
-
-	dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
-	if err != nil {
-		return err
+	dat := ""
+	if ml.ExistsInModelPath(modelTemplateFile) {
+		d, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
+		if err != nil {
+			return err
+		}
+		dat = string(d)
+	} else {
+		dat = templateName
 	}

 	// Parse the template
-	tmpl, err := template.New("prompt").Parse(string(dat))
+	tmpl, err := template.New("prompt").Parse(dat)
 	if err != nil {
 		return err
 	}
--- a/pkg/utils/uri.go
+++ b/pkg/utils/uri.go
@@ -1,12 +1,18 @@
 package utils

 import (
+	"crypto/md5"
+	"crypto/sha256"
 	"fmt"
+	"hash"
 	"io"
 	"net/http"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
+
+	"github.com/rs/zerolog/log"
 )

 const (
@@ -64,3 +70,173 @@ func GetURI(url string, f func(url string, i []byte) error) error {
 	// Unmarshal YAML data into a struct
 	return f(url, body)
 }
+
+func ConvertURL(s string) string {
+	switch {
+	case strings.HasPrefix(s, "huggingface://"):
+		repository := strings.Replace(s, "huggingface://", "", 1)
+		// convert repository to a full URL.
+		// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
+		owner := strings.Split(repository, "/")[0]
+		repo := strings.Split(repository, "/")[1]
+		branch := "main"
+		if strings.Contains(repo, "@") {
+			branch = strings.Split(repository, "@")[1]
+		}
+		filepath := strings.Split(repository, "/")[2]
+		if strings.Contains(filepath, "@") {
+			filepath = strings.Split(filepath, "@")[0]
+		}
+
+		return fmt.Sprintf("https://huggingface.co/%s/%s/resolve/%s/%s", owner, repo, branch, filepath)
+	}
+
+	return s
+}
+
+func DownloadFile(url string, filePath, sha string, downloadStatus func(string, string, string, float64)) error {
+	url = ConvertURL(url)
+	// Check if the file already exists
+	_, err := os.Stat(filePath)
+	if err == nil {
+		// File exists, check SHA
+		if sha != "" {
+			// Verify SHA
+			calculatedSHA, err := calculateSHA(filePath)
+			if err != nil {
+				return fmt.Errorf("failed to calculate SHA for file %q: %v", filePath, err)
+			}
+			if calculatedSHA == sha {
+				// SHA matches, skip downloading
+				log.Debug().Msgf("File %q already exists and matches the SHA. Skipping download", filePath)
+				return nil
+			}
+			// SHA doesn't match, delete the file and download again
+			err = os.Remove(filePath)
+			if err != nil {
+				return fmt.Errorf("failed to remove existing file %q: %v", filePath, err)
+			}
+			log.Debug().Msgf("Removed %q (SHA doesn't match)", filePath)
+
+		} else {
+			// SHA is missing, skip downloading
+			log.Debug().Msgf("File %q already exists. Skipping download", filePath)
+			return nil
+		}
+	} else if !os.IsNotExist(err) {
+		// Error occurred while checking file existence
+		return fmt.Errorf("failed to check file %q existence: %v", filePath, err)
+	}
+
+	log.Info().Msgf("Downloading %q", url)
+
+	// Download file
+	resp, err := http.Get(url)
+	if err != nil {
+		return fmt.Errorf("failed to download file %q: %v", filePath, err)
+	}
+	defer resp.Body.Close()
+
+	// Create parent directory
+	err = os.MkdirAll(filepath.Dir(filePath), 0755)
+	if err != nil {
+		return fmt.Errorf("failed to create parent directory for file %q: %v", filePath, err)
+	}
+
+	// Create and write file content
+	outFile, err := os.Create(filePath)
+	if err != nil {
+		return fmt.Errorf("failed to create file %q: %v", filePath, err)
+	}
+	defer outFile.Close()
+
+	progress := &progressWriter{
+		fileName:       filePath,
+		total:          resp.ContentLength,
+		hash:           sha256.New(),
+		downloadStatus: downloadStatus,
+	}
+	_, err = io.Copy(io.MultiWriter(outFile, progress), resp.Body)
+	if err != nil {
+		return fmt.Errorf("failed to write file %q: %v", filePath, err)
+	}
+
+	if sha != "" {
+		// Verify SHA
+		calculatedSHA := fmt.Sprintf("%x", progress.hash.Sum(nil))
+		if calculatedSHA != sha {
+			log.Debug().Msgf("SHA mismatch for file %q ( calculated: %s != metadata: %s )", filePath, calculatedSHA, sha)
+			return fmt.Errorf("SHA mismatch for file %q ( calculated: %s != metadata: %s )", filePath, calculatedSHA, sha)
+		}
+	} else {
+		log.Debug().Msgf("SHA missing for %q. Skipping validation", filePath)
+	}
+
+	log.Info().Msgf("File %q downloaded and verified", filePath)
+	if IsArchive(filePath) {
+		basePath := filepath.Dir(filePath)
+		log.Info().Msgf("File %q is an archive, uncompressing to %s", filePath, basePath)
+		if err := ExtractArchive(filePath, basePath); err != nil {
+			log.Debug().Msgf("Failed decompressing %q: %s", filePath, err.Error())
+			return err
+		}
+	}
+
+	return nil
+}
+
+type progressWriter struct {
+	fileName       string
+	total          int64
+	written        int64
+	downloadStatus func(string, string, string, float64)
+	hash           hash.Hash
+}
+
+func (pw *progressWriter) Write(p []byte) (n int, err error) {
+	n, err = pw.hash.Write(p)
+	pw.written += int64(n)
+
+	if pw.total > 0 {
+		percentage := float64(pw.written) / float64(pw.total) * 100
+		//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
+		pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
+	} else {
+		pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0)
+	}
+
+	return
+}
+
+// MD5 of a string
+func MD5(s string) string {
+	return fmt.Sprintf("%x", md5.Sum([]byte(s)))
+}
+
+func formatBytes(bytes int64) string {
+	const unit = 1024
+	if bytes < unit {
+		return strconv.FormatInt(bytes, 10) + " B"
+	}
+	div, exp := int64(unit), 0
+	for n := bytes / unit; n >= unit; n /= unit {
+		div *= unit
+		exp++
+	}
+	return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
+}
+
+func calculateSHA(filePath string) (string, error) {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return "", err
+	}
+	defer file.Close()
+
+	hash := sha256.New()
+	if _, err := io.Copy(hash, file); err != nil {
+		return "", err
+	}
+
+	return fmt.Sprintf("%x", hash.Sum(nil)), nil
+}
Author	SHA1	Message	Date
Ettore Di Giacinto	9ae47d37e9	pin go-rwkv Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-12-21 08:42:40 +01:00
Ettore Di Giacinto	2b3ad7f41c	Revert "⬆️ Update donomii/go-rwkv.cpp" (#1474 ) Revert "⬆️ Update donomii/go-rwkv.cpp (#1470)" This reverts commit `51db10b18f`.	2023-12-21 08:38:50 +01:00
LocalAI [bot]	51db10b18f	⬆️ Update donomii/go-rwkv.cpp (#1470 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-21 08:35:31 +01:00
Ettore Di Giacinto	b4b21a446b	feat(conda): share envs with transformer-based backends (#1465 ) * feat(conda): share env between diffusers and bark * Detect if env already exists * share diffusers and petals * tests: add petals * Use smaller model for tests with petals * test only model load on petals * tests(petals): run only load model tests * Revert "test only model load on petals" This reverts commit `111cfa97f1`. * move transformers and sentencetransformers to common env * Share also transformers-musicgen	2023-12-21 08:35:15 +01:00
LocalAI [bot]	23eced1644	⬆️ Update ggerganov/llama.cpp (#1461 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-20 18:02:52 +01:00
LocalAI [bot]	7741a6e75d	⬆️ Update ggerganov/whisper.cpp (#1462 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-20 00:21:49 +00:00
LocalAI [bot]	d4210db0c9	⬆️ Update ggerganov/llama.cpp (#1457 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-19 00:42:19 +01:00
lunamidori5	17dde75107	How To (Updates and Fixes) (#1456 ) * Update easy-setup-embeddings.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-docker-cpu.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-docker-gpu.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update and rename easy-setup-docker-cpu.md to easy-setup-docker.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-docker.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-docker.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update _index.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-docker.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-docker.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Delete docs/content/howtos/easy-setup-docker-gpu.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update _index.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-sd.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> * Update easy-setup-sd.md Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com> --------- Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>	2023-12-18 18:59:08 +01:00
Ettore Di Giacinto	1fc3a375df	feat: inline templates and accept URLs in models (#1452 ) * feat: Allow inline templates * feat: Allow to specify url in model config files Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com> * feat: support 'huggingface://' format * style: reuse-code from gallery --------- Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-12-18 18:58:44 +01:00
LocalAI [bot]	64a8471dd5	⬆️ Update ggerganov/llama.cpp (#1455 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-18 08:55:29 +01:00
LocalAI [bot]	86a8df1c8b	⬆️ Update ggerganov/llama.cpp (#1450 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-12-17 19:02:28 +01:00
Ettore Di Giacinto	2eeed2287b	docs: automatically track latest versions (#1451 )	2023-12-17 19:02:13 +01:00