Update gpt4all to fix thread counts (#249 )

docs: Update README
examples: add langchain-chroma example (#248 )
2026-02-03 03:02:38 -05:00 · 2023-05-13 09:37:46 +02:00 · 2023-05-13 00:46:48 +02:00 · 2023-05-12 22:20:07 +02:00 · 2023-05-12 17:16:49 +02:00 · 2023-05-12 14:10:18 +02:00
26 changed files with 624 additions and 524 deletions
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -30,6 +30,9 @@ jobs:
          - repository: "go-skynet/bloomz.cpp"
            variable: "BLOOMZ_VERSION"
            branch: "main"
+          - repository: "go-skynet/gpt4all"
+            variable: "GPT4ALL_VERSION"
+            branch: "main"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -21,7 +21,7 @@ jobs:
      - name: Dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install build-essential
+          sudo apt-get install build-essential ffmpeg
      - name: Test
        run: |
          make test
@@ -38,7 +38,7 @@ jobs:
      - name: Dependencies
        run: |
          brew update
-          brew install sdl2
+          brew install sdl2 ffmpeg
      - name: Test
        run: |
          make test
--- a/74
+++ b/74
@@ -3,13 +3,14 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai

-GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
-GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
-GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d
+GOLLAMA_VERSION?=70593fccbe4b01dedaab805b0f25cb58192c7b38
+GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
+GPT4ALL_VERSION?=a330bfe26e9e35ca402e16df18973a3b162fb4db
+GOGPT2_VERSION?=92421a8cf61ed6e03babd9067af292b094cb1307
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
 WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
-BERT_VERSION?=ec771ec715576ac050263bb7bb74bfd616a5ba13
+BERT_VERSION?=ac22f8f74aec5e31bc46242c17e7d511f127856b
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1


@@ -19,8 +20,8 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

-C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
-LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
+C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
+LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz

 # Use this if you want to set the default behavior
 ifndef BUILD_TYPE
@@ -37,19 +38,26 @@ endif

 all: help

-## GPT4ALL-J
-go-gpt4all-j:
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
-	cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
+## GPT4ALL
+gpt4all:
+	git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
+	cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
-	@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
-	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
-	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
-	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
-	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
+	@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
+	mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h

 ## BERT embeddings
 go-bert:
@@ -85,8 +93,8 @@ bloomz/libbloomz.a: bloomz
 go-bert/libgobert.a: go-bert
 	$(MAKE) -C go-bert libgobert.a

-go-gpt4all-j/libgptj.a: go-gpt4all-j
-	$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
+gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
+	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a

 ## CEREBRAS GPT
 go-gpt2: 
@@ -96,8 +104,12 @@ go-gpt2:
 	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
-	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +

 go-gpt2/libgpt2.a: go-gpt2
@@ -119,20 +131,20 @@ go-llama/libbinding.a: go-llama

 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
+	$(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
 	$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz

-prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert bloomz
+prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace
 	$(GOCMD) mod download

 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(MAKE) -C go-llama clean
-	$(MAKE) -C go-gpt4all-j clean
+	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
 	$(MAKE) -C go-gpt2 clean
 	$(MAKE) -C go-rwkv clean
 	$(MAKE) -C whisper.cpp clean
@@ -140,11 +152,11 @@ rebuild: ## Rebuilds the project
 	$(MAKE) -C bloomz clean
 	$(MAKE) build

-prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a replace ## Prepares for building
+prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a  ## Prepares for building

 clean: ## Remove build related file
 	rm -fr ./go-llama
-	rm -rf ./go-gpt4all-j
+	rm -rf ./gpt4all
 	rm -rf ./go-gpt2
 	rm -rf ./go-rwkv
 	rm -rf ./go-bert
@@ -156,7 +168,7 @@ clean: ## Remove build related file
 build: prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
-	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./

 generic-build: ## Build the project using generic
 	BUILD_TYPE="generic" $(MAKE) build
@@ -167,12 +179,16 @@ run: prepare ## run local-ai

 test-models/testmodel:
 	mkdir test-models
+	mkdir test-dir
 	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
+	wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
+	wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
+	wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
 	cp tests/fixtures/* test-models

 test: prepare test-models/testmodel
 	cp tests/fixtures/* test-models
-	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
+	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api

 ## Help:
 help: ## Show this help.
--- a/README.md
+++ b/README.md
@@ -25,7 +25,9 @@ See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/

 ## News

- 10-05-2023: __1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
+- 12-05-2023: __v1.10.0__ released! 🔥🔥 Updated `gpt4all` bindings. Added support for GPTNeox (experimental), RedPajama (experimental), Starcoder (experimental), Replit (experimental), MosaicML MPT. Also now `embeddings` endpoint supports tokens arrays. See the [langchain-chroma](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma) example! Note - this update does NOT include https://github.com/ggerganov/llama.cpp/pull/1405 which makes models incompatible.
+- 11-05-2023: __v1.9.0__ released! 🔥 Important whisper updates ( https://github.com/go-skynet/LocalAI/pull/233 https://github.com/go-skynet/LocalAI/pull/229 ) and extended gpt4all model families support ( https://github.com/go-skynet/LocalAI/pull/232 ). Redpajama/dolly experimental ( https://github.com/go-skynet/LocalAI/pull/214 )
+- 10-05-2023: __v1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
 - 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
 - 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
 - 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
@@ -35,7 +37,8 @@ Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https:

 ### Blogs and articles

- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
+- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/) by Ettore Di Giacinto
+- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters. by Tyller Gillson

 ## Contribute and help

@@ -73,7 +76,7 @@ Note: You might need to convert older models to the new format, see [here](https

 A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).

-Note: rwkv models have an associated tokenizer along that needs to be provided with it:
+Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:

 ```
 36464540 -rw-r--r--  1 mudler mudler 1.2G May  3 10:51 rwkv_small
@@ -91,6 +94,30 @@ It should also be compatible with StableLM and GPTNeoX ggml models (untested).
 Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.


+### Model compatibility table
+
+<details>
+
+| Backend         | Compatible models     | Completion/Chat endpoint | Audio transcription | Embeddings support                | Token stream support | Github                                     | Bindings                                  |
+|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------|
+| llama           | Vicuna, Alpaca, LLaMa | yes                      | no                  | yes (doesn't seem to be accurate) | yes                  | https://github.com/ggerganov/llama.cpp     | https://github.com/go-skynet/go-llama.cpp |
+| gpt4all-llama   | Vicuna, Alpaca, LLaMa | yes                      | no                  | no                                | yes                  | https://github.com/nomic-ai/gpt4all        | https://github.com/go-skynet/gpt4all      |
+| gpt4all-mpt     | MPT                   | yes                      | no                  | no                                | yes                  | https://github.com/nomic-ai/gpt4all        | https://github.com/go-skynet/gpt4all      |
+| gpt4all-j       | GPT4ALL-J             | yes                      | no                  | no                                | yes                  | https://github.com/nomic-ai/gpt4all        | https://github.com/go-skynet/gpt4all      |
+| gpt2            | GPT/NeoX, Cerebras    | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
+| dolly           | Dolly                 | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
+| redpajama       | RedPajama             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
+| stableLM        | StableLM GPT/NeoX     | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
+| replit       | Replit             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
+| gptneox       | GPT NeoX             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
+| starcoder       | Starcoder             | yes                      | no                  | no                                | no                   | https://github.com/ggerganov/ggml          | https://github.com/go-skynet/go-gpt2.cpp  |
+| bloomz          | Bloom                 | yes                      | no                  | no                                | no                   | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp   |
+| rwkv            | RWKV                  | yes                      | no                  | no                                | yes                  | https://github.com/saharNooby/rwkv.cpp     | https://github.com/donomii/go-rwkv.cpp    |
+| bert-embeddings | bert                  | no                       | no                  | yes                               | no                   | https://github.com/skeskinen/bert.cpp      | https://github.com/go-skynet/go-bert.cpp  |
+| whisper         | whisper               | no                       | yes                 | no                                | no                   | https://github.com/ggerganov/whisper.cpp   | https://github.com/ggerganov/whisper.cpp  |
+
+</details>
+
 ## Usage

 > `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
@@ -545,6 +572,7 @@ name: text-embedding-ada-002
 parameters:
  model: bert
 embeddings: true
+backend: "bert-embeddings"
 ```

 There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
@@ -563,6 +591,7 @@ Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tre

 ```yaml
 name: whisper-1
+backend: whisper
 parameters:
  model: whisper-en
 ```
--- a/api/api.go
+++ b/api/api.go
@@ -12,7 +12,7 @@ import (
 	"github.com/rs/zerolog/log"
 )

-func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
+func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
 	if debug {
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
@@ -20,6 +20,7 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16

 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
+		BodyLimit:             uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
 		DisableStartupMessage: disableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -3,6 +3,8 @@ package api_test
 import (
 	"context"
 	"os"
+	"path/filepath"
+	"runtime"

 	. "github.com/go-skynet/LocalAI/api"
 	"github.com/go-skynet/LocalAI/pkg/model"
@@ -23,7 +25,7 @@ var _ = Describe("API test", func() {
 	Context("API query", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App("", modelLoader, 1, 512, false, true, true)
+			app = App("", modelLoader, 15, 1, 512, false, true, true)
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
@@ -45,8 +47,7 @@ var _ = Describe("API test", func() {
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(3))
-			Expect(models.Models[0].ID).To(Equal("testmodel"))
+			Expect(len(models.Models)).To(Equal(7))
 		})
 		It("can generate completions", func() {
 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
@@ -79,15 +80,55 @@ var _ = Describe("API test", func() {
 		It("returns errors", func() {
 			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
 			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:"))
+			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
+		})
+		It("transcribes audio", func() {
+			if runtime.GOOS != "linux" {
+				Skip("test supported only on linux")
+			}
+			resp, err := client.CreateTranscription(
+				context.Background(),
+				openai.AudioRequest{
+					Model:    openai.Whisper1,
+					FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
+				},
+			)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
 		})

+		It("calculate embeddings", func() {
+			if runtime.GOOS != "linux" {
+				Skip("test supported only on linux")
+			}
+			resp, err := client.CreateEmbeddings(
+				context.Background(),
+				openai.EmbeddingRequest{
+					Model: openai.AdaEmbeddingV2,
+					Input: []string{"sun", "cat"},
+				},
+			)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
+			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
+
+			sunEmbedding := resp.Data[0].Embedding
+			resp2, err := client.CreateEmbeddings(
+				context.Background(),
+				openai.EmbeddingRequest{
+					Model: openai.AdaEmbeddingV2,
+					Input: []string{"sun"},
+				},
+			)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
+		})
 	})

 	Context("Config file", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
-			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
+			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true)
 			go app.Listen("127.0.0.1:9090")

 			defaultConfig := openai.DefaultConfig("")
@@ -108,8 +149,7 @@ var _ = Describe("API test", func() {

 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(5))
-			Expect(models.Models[0].ID).To(Equal("testmodel"))
+			Expect(len(models.Models)).To(Equal(9))
 		})
 		It("can generate chat completions from config file", func() {
 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
@@ -134,5 +174,6 @@ var _ = Describe("API test", func() {
 			Expect(len(resp.Choices)).To(Equal(1))
 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
 		})
+
 	})
 })
--- a/api/config.go
+++ b/api/config.go
@@ -285,5 +285,10 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
 		}
 	}

+	// Enforce debug flag if passed from CLI
+	if debug {
+		config.Debug = true
+	}
+
 	return config, input, nil
 }
--- a/api/openai.go
+++ b/api/openai.go
@@ -12,8 +12,10 @@ import (
 	"path/filepath"
 	"strings"

+	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
 	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/whisper"
+	whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
+	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
@@ -407,14 +409,13 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 		// retrieve the file data from the request
 		file, err := c.FormFile("file")
 		if err != nil {
-			return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
+			return err
 		}
 		f, err := file.Open()
 		if err != nil {
-			return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
+			return err
 		}
 		defer f.Close()
-		log.Debug().Msgf("Audio file: %+v", file)

 		dir, err := os.MkdirTemp("", "whisper")

@@ -426,24 +427,33 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 		dst := filepath.Join(dir, path.Base(file.Filename))
 		dstFile, err := os.Create(dst)
 		if err != nil {
-			return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
+			return err
 		}

 		if _, err := io.Copy(dstFile, f); err != nil {
-			log.Debug().Msgf("Audio file %+v - %+v - err %+v", file.Filename, dst, err)
+			log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
 			return err
 		}

 		log.Debug().Msgf("Audio file copied to: %+v", dst)

-		whisperModel, err := loader.WhisperLoader("whisper", config.Model)
+		whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
 		if err != nil {
-			return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
+			return err
 		}

-		tr, err := whisper.Transcript(whisperModel, dst, input.Language)
+		if whisperModel == nil {
+			return fmt.Errorf("could not load whisper model")
+		}
+
+		w, ok := whisperModel.(whisper.Model)
+		if !ok {
+			return fmt.Errorf("loader returned non-whisper object")
+		}
+
+		tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
 		if err != nil {
-			return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
+			return err
 		}

 		log.Debug().Msgf("Trascribed: %+v", tr)
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -11,8 +11,8 @@ import (
 	"github.com/go-skynet/bloomz.cpp"
 	bert "github.com/go-skynet/go-bert.cpp"
 	gpt2 "github.com/go-skynet/go-gpt2.cpp"
-	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
+	gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
 )

 // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
@@ -68,7 +68,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
 	case *bert.Bert:
 		fn = func() ([]float32, error) {
 			if len(tokens) > 0 {
-				return nil, fmt.Errorf("embeddings endpoint for this model supports only string")
+				return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
 			}
 			return model.Embeddings(s, bert.SetThreads(c.Threads))
 		}
@@ -199,6 +199,78 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback

 			return response, nil
 		}
+	case *gpt2.GPTNeoX:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.Replit:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.Starcoder:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
 	case *gpt2.RedPajama:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
@@ -315,29 +387,35 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
 				predictOptions...,
 			)
 		}
-	case *gptj.GPTJ:
+	case *gpt4all.Model:
+		supportStreams = true
+
 		fn = func() (string, error) {
+			if tokenCallback != nil {
+				model.SetTokenCallback(tokenCallback)
+			}
+
 			// Generate the prediction using the language model
-			predictOptions := []gptj.PredictOption{
-				gptj.SetTemperature(c.Temperature),
-				gptj.SetTopP(c.TopP),
-				gptj.SetTopK(c.TopK),
-				gptj.SetTokens(c.Maxtokens),
-				gptj.SetThreads(c.Threads),
+			predictOptions := []gpt4all.PredictOption{
+				gpt4all.SetTemperature(c.Temperature),
+				gpt4all.SetTopP(c.TopP),
+				gpt4all.SetTopK(c.TopK),
+				gpt4all.SetTokens(c.Maxtokens),
 			}

 			if c.Batch != 0 {
-				predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
+				predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
 			}

-			if c.Seed != 0 {
-				predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
-			}
-
-			return model.Predict(
+			str, er := model.Predict(
 				s,
 				predictOptions...,
 			)
+			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
+			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
+			// after a stream event has occurred
+			model.SetTokenCallback(nil)
+			return str, er
 		}
 	case *llama.LLama:
 		supportStreams = true
--- a/examples/README.md
+++ b/examples/README.md
@@ -65,7 +65,7 @@ Run a slack bot which lets you talk directly with a model

 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)

-### Question answering on documents
+### Question answering on documents with llama-index

 _by [@mudler](https://github.com/mudler)_

@@ -73,6 +73,14 @@ Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/st

 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)

+### Question answering on documents with langchain and chroma
+
+_by [@mudler](https://github.com/mudler)_
+
+Shows how to integrate with `Langchain` and `Chroma` to enable question answering on a set of documents.
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
+
 ### Template for Runpod.io

 _by [@fHachenberg](https://github.com/fHachenberg)_
--- a/examples/langchain-chroma/README.md
+++ b/examples/langchain-chroma/README.md
@@ -0,0 +1,54 @@
+# Data query example
+
+This example makes use of [langchain and chroma](https://blog.langchain.dev/langchain-chroma/) to enable question answering on a set of documents.
+
+## Setup
+
+Download the models and start the API:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/query_data
+
+wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+### Python requirements
+
+```
+pip install -r requirements.txt
+```
+
+### Create a storage
+
+In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
+python store.py
+```
+
+After it finishes, a directory "storage" will be created with the vector index database.
+
+## Query
+
+We can now query the dataset. 
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python query.py
+# President Trump recently stated during a press conference regarding tax reform legislation that "we're getting rid of all these loopholes." He also mentioned that he wants to simplify the system further through changes such as increasing the standard deduction amount and making other adjustments aimed at reducing taxpayers' overall burden.    
+```
+
+Keep in mind now things are hit or miss!
--- a/examples/langchain-chroma/models/completion.tmpl
+++ b/examples/langchain-chroma/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/langchain-chroma/models/embeddings.yaml
+++ b/examples/langchain-chroma/models/embeddings.yaml
@@ -0,0 +1,5 @@
+name: text-embedding-ada-002
+parameters:
+  model: bert
+backend: bert-embeddings
+embeddings: true
--- a/examples/langchain-chroma/models/gpt-3.5-turbo.yaml
+++ b/examples/langchain-chroma/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,16 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/langchain-chroma/models/gpt4all.tmpl
+++ b/examples/langchain-chroma/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/examples/langchain-chroma/query.py
+++ b/examples/langchain-chroma/query.py
@@ -0,0 +1,31 @@
+
+import os
+from langchain.vectorstores import Chroma
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter,CharacterTextSplitter
+from langchain.llms import OpenAI
+from langchain.chains import VectorDBQA
+from langchain.document_loaders import TextLoader
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# Load and process the text
+loader = TextLoader('state_of_the_union.txt')
+documents = loader.load()
+
+text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
+texts = text_splitter.split_documents(documents)
+
+# Embed and store the texts
+# Supplying a persist_directory will store the embeddings on disk
+persist_directory = 'db'
+
+embedding = OpenAIEmbeddings()
+
+# Now we can load the persisted database from disk, and use it as normal. 
+vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
+qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb)
+
+query = "What the president said about taxes ?"
+print(qa.run(query))
+
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -0,0 +1,4 @@
+langchain==0.0.160
+openai==0.27.6
+chromadb==0.3.21
+llama-index==0.6.2
--- a/examples/langchain-chroma/store.py
+++ b/examples/langchain-chroma/store.py
@@ -0,0 +1,28 @@
+
+import os
+from langchain.vectorstores import Chroma
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter
+from langchain.llms import OpenAI
+from langchain.chains import VectorDBQA
+from langchain.document_loaders import TextLoader
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# Load and process the text
+loader = TextLoader('state_of_the_union.txt')
+documents = loader.load()
+
+text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
+#text_splitter = TokenTextSplitter()
+texts = text_splitter.split_documents(documents)
+
+# Embed and store the texts
+# Supplying a persist_directory will store the embeddings on disk
+persist_directory = 'db'
+
+embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
+vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
+
+vectordb.persist()
+vectordb = None
--- a/go.mod
+++ b/go.mod
@@ -3,11 +3,11 @@ module github.com/go-skynet/LocalAI
 go 1.19

 require (
-	github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
+	github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2
 	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f
-	github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea
+	github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b
@@ -18,7 +18,7 @@ require (
 	github.com/otiai10/copy v1.11.0
 	github.com/otiai10/openaigo v1.1.0
 	github.com/rs/zerolog v1.29.1
-	github.com/sashabaranov/go-openai v1.9.3
+	github.com/sashabaranov/go-openai v1.9.4
 	github.com/swaggo/swag v1.16.1
 	github.com/urfave/cli/v2 v2.25.3
 	github.com/valyala/fasthttp v1.47.0
@@ -49,6 +49,7 @@ require (
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.18 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
+	github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect
 	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -18,6 +18,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
 github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
@@ -44,6 +46,8 @@ github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVe
 github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
@@ -115,6 +119,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
 github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
+github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
--- a/main.go
+++ b/main.go
@@ -62,6 +62,12 @@ func main() {
 				EnvVars:     []string{"CONTEXT_SIZE"},
 				Value:       512,
 			},
+			&cli.IntFlag{
+				Name:        "upload-limit",
+				DefaultText: "Default upload-limit. MB",
+				EnvVars:     []string{"UPLOAD_LIMIT"},
+				Value:       15,
+			},
 		},
 		Description: `
 LocalAI is a drop-in replacement OpenAI API which runs inference locally.
@@ -81,7 +87,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
 			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
-			return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
+			return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
 		},
 	}

--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -0,0 +1,182 @@
+package model
+
+import (
+	"fmt"
+	"strings"
+
+	rwkv "github.com/donomii/go-rwkv.cpp"
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	bloomz "github.com/go-skynet/bloomz.cpp"
+	bert "github.com/go-skynet/go-bert.cpp"
+	gpt2 "github.com/go-skynet/go-gpt2.cpp"
+	llama "github.com/go-skynet/go-llama.cpp"
+	"github.com/hashicorp/go-multierror"
+	gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
+	"github.com/rs/zerolog/log"
+)
+
+const tokenizerSuffix = ".tokenizer.json"
+
+const (
+	LlamaBackend          = "llama"
+	BloomzBackend         = "bloomz"
+	StarcoderBackend      = "starcoder"
+	StableLMBackend       = "stablelm"
+	DollyBackend          = "dolly"
+	RedPajamaBackend      = "redpajama"
+	GPTNeoXBackend        = "gptneox"
+	ReplitBackend         = "replit"
+	Gpt2Backend           = "gpt2"
+	Gpt4AllLlamaBackend   = "gpt4all-llama"
+	Gpt4AllMptBackend     = "gpt4all-mpt"
+	Gpt4AllJBackend       = "gpt4all-j"
+	BertEmbeddingsBackend = "bert-embeddings"
+	RwkvBackend           = "rwkv"
+	WhisperBackend        = "whisper"
+)
+
+var backends []string = []string{
+	LlamaBackend,
+	Gpt4AllLlamaBackend,
+	Gpt4AllMptBackend,
+	Gpt4AllJBackend,
+	Gpt2Backend,
+	WhisperBackend,
+	RwkvBackend,
+	BloomzBackend,
+	StableLMBackend,
+	DollyBackend,
+	RedPajamaBackend,
+	GPTNeoXBackend,
+	ReplitBackend,
+	BertEmbeddingsBackend,
+	StarcoderBackend,
+}
+
+var starCoder = func(modelFile string) (interface{}, error) {
+	return gpt2.NewStarcoder(modelFile)
+}
+
+var redPajama = func(modelFile string) (interface{}, error) {
+	return gpt2.NewRedPajama(modelFile)
+}
+
+var dolly = func(modelFile string) (interface{}, error) {
+	return gpt2.NewDolly(modelFile)
+}
+
+var gptNeoX = func(modelFile string) (interface{}, error) {
+	return gpt2.NewGPTNeoX(modelFile)
+}
+
+var replit = func(modelFile string) (interface{}, error) {
+	return gpt2.NewReplit(modelFile)
+}
+
+var stableLM = func(modelFile string) (interface{}, error) {
+	return gpt2.NewStableLM(modelFile)
+}
+
+var bertEmbeddings = func(modelFile string) (interface{}, error) {
+	return bert.New(modelFile)
+}
+
+var bloomzLM = func(modelFile string) (interface{}, error) {
+	return bloomz.New(modelFile)
+}
+var gpt2LM = func(modelFile string) (interface{}, error) {
+	return gpt2.New(modelFile)
+}
+
+var whisperModel = func(modelFile string) (interface{}, error) {
+	return whisper.New(modelFile)
+}
+
+func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
+	return func(s string) (interface{}, error) {
+		return llama.New(s, opts...)
+	}
+}
+
+func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
+	return func(s string) (interface{}, error) {
+		return gpt4all.New(s, opts...)
+	}
+}
+
+func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
+	return func(s string) (interface{}, error) {
+		model := rwkv.LoadFiles(s, tokenFile, threads)
+		if model == nil {
+			return nil, fmt.Errorf("could not load model")
+		}
+		return model, nil
+	}
+}
+
+func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
+	switch strings.ToLower(backendString) {
+	case LlamaBackend:
+		return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
+	case BloomzBackend:
+		return ml.LoadModel(modelFile, bloomzLM)
+	case StableLMBackend:
+		return ml.LoadModel(modelFile, stableLM)
+	case DollyBackend:
+		return ml.LoadModel(modelFile, dolly)
+	case RedPajamaBackend:
+		return ml.LoadModel(modelFile, redPajama)
+	case Gpt2Backend:
+		return ml.LoadModel(modelFile, gpt2LM)
+	case GPTNeoXBackend:
+		return ml.LoadModel(modelFile, gptNeoX)
+	case ReplitBackend:
+		return ml.LoadModel(modelFile, replit)
+	case StarcoderBackend:
+		return ml.LoadModel(modelFile, starCoder)
+	case Gpt4AllLlamaBackend:
+		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
+	case Gpt4AllMptBackend:
+		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
+	case Gpt4AllJBackend:
+		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
+	case BertEmbeddingsBackend:
+		return ml.LoadModel(modelFile, bertEmbeddings)
+	case RwkvBackend:
+		return ml.LoadModel(modelFile, rwkvLM(modelFile+tokenizerSuffix, threads))
+	case WhisperBackend:
+		return ml.LoadModel(modelFile, whisperModel)
+	default:
+		return nil, fmt.Errorf("backend unsupported: %s", backendString)
+	}
+}
+
+func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
+	log.Debug().Msgf("Loading models greedly")
+
+	ml.mu.Lock()
+	m, exists := ml.models[modelFile]
+	if exists {
+		ml.mu.Unlock()
+		return m, nil
+	}
+	ml.mu.Unlock()
+	var err error
+
+	for _, b := range backends {
+		if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
+			continue
+		}
+		log.Debug().Msgf("[%s] Attempting to load", b)
+		model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads)
+		if modelerr == nil && model != nil {
+			log.Debug().Msgf("[%s] Loads OK", b)
+			return model, nil
+		} else if modelerr != nil {
+			err = multierror.Append(err, modelerr)
+			log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
+		}
+	}
+
+	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
+}
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -10,14 +10,6 @@ import (
 	"sync"
 	"text/template"

-	rwkv "github.com/donomii/go-rwkv.cpp"
-	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
-	bloomz "github.com/go-skynet/bloomz.cpp"
-	bert "github.com/go-skynet/go-bert.cpp"
-	gpt2 "github.com/go-skynet/go-gpt2.cpp"
-	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
-	llama "github.com/go-skynet/go-llama.cpp"
-	"github.com/hashicorp/go-multierror"
 	"github.com/rs/zerolog/log"
 )

@@ -25,33 +17,15 @@ type ModelLoader struct {
 	ModelPath string
 	mu        sync.Mutex
 	// TODO: this needs generics
-	models            map[string]*llama.LLama
-	gptmodels         map[string]*gptj.GPTJ
-	gpt2models        map[string]*gpt2.GPT2
-	gptstablelmmodels map[string]*gpt2.StableLM
-	dollymodels       map[string]*gpt2.Dolly
-	redpajama         map[string]*gpt2.RedPajama
-	rwkv              map[string]*rwkv.RwkvState
-	bloomz            map[string]*bloomz.Bloomz
-	bert              map[string]*bert.Bert
-	promptsTemplates  map[string]*template.Template
-	whisperModels     map[string]whisper.Model
+	models           map[string]interface{}
+	promptsTemplates map[string]*template.Template
 }

 func NewModelLoader(modelPath string) *ModelLoader {
 	return &ModelLoader{
-		ModelPath:         modelPath,
-		gpt2models:        make(map[string]*gpt2.GPT2),
-		gptmodels:         make(map[string]*gptj.GPTJ),
-		gptstablelmmodels: make(map[string]*gpt2.StableLM),
-		dollymodels:       make(map[string]*gpt2.Dolly),
-		redpajama:         make(map[string]*gpt2.RedPajama),
-		models:            make(map[string]*llama.LLama),
-		rwkv:              make(map[string]*rwkv.RwkvState),
-		bloomz:            make(map[string]*bloomz.Bloomz),
-		bert:              make(map[string]*bert.Bert),
-		promptsTemplates:  make(map[string]*template.Template),
-		whisperModels:     make(map[string]whisper.Model),
+		ModelPath:        modelPath,
+		models:           make(map[string]interface{}),
+		promptsTemplates: make(map[string]*template.Template),
 	}
 }

@@ -136,271 +110,11 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
 	return nil
 }

-func (ml *ModelLoader) LoadRedPajama(modelName string) (*gpt2.RedPajama, error) {
+func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()

 	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.redpajama[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := gpt2.NewRedPajama(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
-
-	ml.redpajama[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadDollyModel(modelName string) (*gpt2.Dolly, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.dollymodels[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := gpt2.NewDolly(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
-
-	ml.dollymodels[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.gptstablelmmodels[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := gpt2.NewStableLM(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
-
-	ml.gptstablelmmodels[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.bert[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := bert.New(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
-
-	ml.bert[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadBloomz(modelName string) (*bloomz.Bloomz, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.bloomz[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := bloomz.New(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
-
-	ml.bloomz[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.gpt2models[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := gpt2.New(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
-
-	ml.gpt2models[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.gptmodels[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := gptj.New(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	// If there is a prompt template, load it
-	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
-		return nil, err
-	}
-
-	ml.gptmodels[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	log.Debug().Msgf("Loading model name: %s", modelName)
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
-	if m, ok := ml.rwkv[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	tokenPath := filepath.Join(ml.ModelPath, tokenFile)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model := rwkv.LoadFiles(modelFile, tokenPath, threads)
-	if model == nil {
-		return nil, fmt.Errorf("could not load model")
-	}
-
-	ml.rwkv[modelName] = model
-	return model, nil
-}
-
-func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	log.Debug().Msgf("Loading model name: %s", modelName)
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist")
-	}
-
 	if m, ok := ml.models[modelName]; ok {
 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
 		return m, nil
@@ -410,7 +124,7 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
 	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

-	model, err := llama.New(modelFile, opts...)
+	model, err := loader(modelFile)
 	if err != nil {
 		return nil, err
 	}
@@ -421,162 +135,5 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
 	}

 	ml.models[modelName] = model
-	return model, err
-}
-
-func (ml *ModelLoader) LoadWhisperModel(modelName string) (whisper.Model, error) {
-	ml.mu.Lock()
-	defer ml.mu.Unlock()
-
-	// Check if we already have a loaded model
-	if !ml.ExistsInModelPath(modelName) {
-		return nil, fmt.Errorf("model does not exist -- %s", modelName)
-	}
-
-	if m, ok := ml.whisperModels[modelName]; ok {
-		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
-		return m, nil
-	}
-
-	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.ModelPath, modelName)
-	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
-
-	model, err := whisper.New(modelFile)
-	if err != nil {
-		return nil, err
-	}
-
-	ml.whisperModels[modelName] = model
-	return model, err
-}
-
-const tokenizerSuffix = ".tokenizer.json"
-
-var loadedModels map[string]interface{} = map[string]interface{}{}
-var muModels sync.Mutex
-
-func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
-	switch strings.ToLower(backendString) {
-	case "llama":
-		return ml.LoadLLaMAModel(modelFile, llamaOpts...)
-	case "bloomz":
-		return ml.LoadBloomz(modelFile)
-	case "stablelm":
-		return ml.LoadStableLMModel(modelFile)
-	case "dolly":
-		return ml.LoadDollyModel(modelFile)
-	case "redpajama":
-		return ml.LoadRedPajama(modelFile)
-	case "gpt2":
-		return ml.LoadGPT2Model(modelFile)
-	case "gptj":
-		return ml.LoadGPTJModel(modelFile)
-	case "bert-embeddings":
-		return ml.LoadBERT(modelFile)
-	case "rwkv":
-		return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
-	default:
-		return nil, fmt.Errorf("backend unsupported: %s", backendString)
-	}
-}
-
-func (ml *ModelLoader) WhisperLoader(backendString string, modelFile string) (model whisper.Model, err error) {
-	//TODO expose more whisper options in next PR
-	switch strings.ToLower(backendString) {
-	case "whisper":
-		return ml.LoadWhisperModel(modelFile)
-	default:
-		return nil, fmt.Errorf("whisper backend unsupported: %s", backendString)
-	}
-}
-
-func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
-	updateModels := func(model interface{}) {
-		muModels.Lock()
-		defer muModels.Unlock()
-		loadedModels[modelFile] = model
-	}
-
-	muModels.Lock()
-	m, exists := loadedModels[modelFile]
-	if exists {
-		muModels.Unlock()
-		return m, nil
-	}
-	muModels.Unlock()
-
-	model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadGPTJModel(modelFile)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadGPT2Model(modelFile)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadStableLMModel(modelFile)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadDollyModel(modelFile)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadRedPajama(modelFile)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadBloomz(modelFile)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	model, modelerr = ml.LoadBERT(modelFile)
-	if modelerr == nil {
-		updateModels(model)
-		return model, nil
-	} else {
-		err = multierror.Append(err, modelerr)
-	}
-
-	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
+	return model, nil
 }
--- a/pkg/whisper/whisper.go
+++ b/pkg/whisper/whisper.go
@@ -28,7 +28,7 @@ func audioToWav(src, dst string) error {
 	return nil
 }

-func Transcript(model whisper.Model, audiopath, language string) (string, error) {
+func Transcript(model whisper.Model, audiopath, language string, threads uint) (string, error) {

 	dir, err := os.MkdirTemp("", "whisper")
 	if err != nil {
@@ -65,8 +65,12 @@ func Transcript(model whisper.Model, audiopath, language string) (string, error)

 	}

+	context.SetThreads(threads)
+
 	if language != "" {
 		context.SetLanguage(language)
+	} else {
+		context.SetLanguage("auto")
 	}

 	if err := context.Process(data, nil); err != nil {
--- a/tests/fixtures/embeddings.yaml
+++ b/tests/fixtures/embeddings.yaml
@@ -0,0 +1,6 @@
+name: text-embedding-ada-002
+parameters:
+  model: bert
+threads: 14
+backend: bert-embeddings
+embeddings: true
--- a/tests/fixtures/whisper.yaml
+++ b/tests/fixtures/whisper.yaml
@@ -0,0 +1,4 @@
+name: whisper-1
+backend: whisper
+parameters:
+  model: whisper-en
Author	SHA1	Message	Date
Ettore Di Giacinto	de36a48861	Update gpt4all to fix thread counts (#249 )	2023-05-13 09:37:46 +02:00
mudler	961ca93219	docs: Update README	2023-05-13 00:46:48 +02:00
Ettore Di Giacinto	557ccc5ad8	examples: add langchain-chroma example (#248 )	2023-05-12 22:20:07 +02:00
Ettore Di Giacinto	2488c445b6	feat: bert.cpp token embeddings (#241 )	2023-05-12 17:16:49 +02:00
Ettore Di Giacinto	b4241d0a0d	tests: enable whisper (#239 )	2023-05-12 14:10:18 +02:00
Ettore Di Giacinto	8250391e49	Add support for gptneox/replit (#238 )	2023-05-12 11:36:35 +02:00
Ettore Di Giacinto	fd1df4e971	whisper: add tests and allow to set upload size (#237 )	2023-05-12 10:04:20 +02:00
ci-robbot [bot]	5115b2faa3	⬆️ Update go-skynet/go-llama.cpp (#219 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-05-11 23:43:55 +02:00
ci-robbot [bot]	93e82a8bf4	⬆️ Update go-skynet/go-gpt2.cpp (#220 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-05-11 23:43:44 +02:00
Ettore Di Giacinto	4413defca5	feat: add starcoder (#236 )	2023-05-11 20:20:07 +02:00
Ettore Di Giacinto	f359e1c6c4	fix: dolly/rp (#235 )	2023-05-11 19:38:27 +02:00
renovate[bot]	1bc87d582d	fix(deps): update module github.com/sashabaranov/go-openai to v1.9.4 (#230 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-11 18:02:19 +02:00
renovate[bot]	a86a383357	fix(deps): update github.com/donomii/go-rwkv.cpp digest to 07166da (#224 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-11 18:01:52 +02:00
renovate[bot]	16f02c7b30	fix(deps): update github.com/go-skynet/go-bert.cpp digest to ec771ec (#223 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-11 18:01:35 +02:00
Ettore Di Giacinto	fe2706890c	Update README.md	2023-05-11 17:32:13 +02:00
Ettore Di Giacinto	85f0f8227d	refactor: drop code dups (#234 )	2023-05-11 16:34:16 +02:00
Ettore Di Giacinto	59e3c02002	make use of new bindings for gpt4all (#232 )	2023-05-11 14:31:19 +02:00